## Load libraries
library(covid19)
library(ggplot2)
library(lubridate)
library(dplyr)
library(ggplot2)
library(sp)
library(raster)
library(viridis)
library(ggthemes)
library(sf)
library(rnaturalearth)
library(rnaturalearthdata)
library(RColorBrewer)
library(readr)
library(zoo)
library(tidyr)
options(scipen = '999')
fl <- df %>%
  filter(district == 'Florida')

ggplot(data = df,
       aes(x = date,
           y = cases)) +
  geom_line()

Municipalities

pd <- muni %>%
  filter(date == max(date)) %>%
    # Get confirmed cases by population
    mutate(p = confirmed_cases / pop * 100000) %>%
  arrange(desc(p))
c25 <- c(
  "dodgerblue2", "#E31A1C", # red
  "green4",
  "#6A3D9A", # purple
  "#FF7F00", # orange
  "black", "gold1",
  "skyblue2", "#FB9A99", # lt pink
  "palegreen2",
  "#CAB2D6", # lt purple
  "#FDBF6F", # lt orange
  "gray70", "khaki2",
  "maroon", "orchid1", "deeppink1", "blue1", "steelblue4",
  "darkturquoise", "green1", "yellow4", "yellow3",
  "darkorange4", "brown"
)

comarcas <- sort(unique(muni$ComarcaDescripcio))
for(i in 1:length(comarcas)){
  this_comarca <- comarcas[i]
  pd <- muni %>%
    # Get confirmed cases by population
    mutate(p = confirmed_cases / pop * 100000) %>%
    dplyr::filter(ComarcaDescripcio %in% this_comarca)
  
  # pie(rep(1, 25), col = c25)
  cols <- colorRampPalette(c25)(length(unique(pd$MunicipiDescripcio)))
  
  g <- ggplot(data = pd,
         aes(x = date,
             y = p)) +
    geom_line(aes(group = MunicipiDescripcio,
                  color = MunicipiDescripcio,
                  size = pop),
              alpha = 0.8) +
    # scale_y_log10() +
    # facet_wrap(~MunicipiDescripcio, scales = 'free_y') +
      scale_color_manual(name = '', values = cols) +
    databrew::theme_simple() +
    labs(x = 'Data',
         y = 'Casos per 100.000',
         title = paste0(this_comarca),
         subtitle = 'Incidència acumulada per 100.000 habitants') +
    scale_size(name = 'Població') +
    guides(color = guide_legend(override.aes = list(size = 2)))
  print(g)
  # Sys.sleep(10)
}

Cumulative incidence map, Catalonia, by municipality

# map <- municipios[municipios@data$id %in% muni$MunicipiCodi,]
catalan_codes <- c('43', '08', '25', '17')
map <- municipios[substr(municipios@data$id, 1, 2) %in% catalan_codes, ]

map_fortified <- fortify(map, region = 'id')
# Define function for getting each day
get_day <- function(the_date = Sys.Date()-3){
  pd <- muni %>% filter(date == the_date)
  joined <- left_join(map_fortified, pd %>% dplyr::rename(id = MunicipiCodi)) %>%
    mutate(p = confirmed_cases / pop * 100000)
  
  ggplot(data = joined,
         aes(x = long,
             y = lat,
             fill = p)) +
    geom_polygon(aes(group = id)) +
    scale_fill_gradientn(name = '',
                         colors = c('white', 'yellow', 'darkorange', 'red', 'darkred', 'black'),
                       # colors = rev(RColorBrewer::brewer.pal(n = 9, name = 'Spectral')),
                       limits = c(0, 10000)) +
    xlim(1,2.5) +
    ylim(41, 42.1)
}
dates <- seq(as.Date('2020-03-08'),
             max(muni$date),
             by = 1)

for(i in 1:length(dates)){
  this_date <- dates[i]
  get_day(the_date = this_date)
}
# Excess mortality spain

pd <- excess %>% dplyr::rename(ccaa = region) %>% 
  filter(ccaa != 'Spain') %>%
  left_join(esp_pop %>%
              mutate(ccaa = ifelse(ccaa == 'Baleares', 'Balears, Illes',
                                   ifelse(ccaa == 'CLM', 'Castile-La Mancha',
                                          ifelse(ccaa == 'CyL', 'Castile & León',
                                                 ifelse(ccaa == 'C. Valenciana', 'Comunitat Valenciana', ccaa)))))) %>%
  mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalonia', ccaa)) %>%
  mutate(ccaa = ifelse(ccaa == 'Comunitat Valenciana', 'Valencia', ccaa)) %>%
  mutate(ccaa = ifelse(ccaa == 'País Vasco', 'Basque country', ccaa)) %>%
  mutate(date = end_date)

# Calculate percentage of normal
pd <- pd %>%
  mutate(p = total_deaths / expected_deaths * 100) %>% filter(!ccaa %in% c('Ceuta', 'Melilla', 'Canarias')) %>%
  mutate(type = ifelse(p > 100, 'Above\nexpected\n', 'Below\nexpected\n'))

# pd <- pd %>% filter(ccaa %in% c('Castile & León',
#                                 'Castile-La Mancha',
#                                 'Catalonia',
#                                 'Madrid'))
pd <- pd %>% filter(date > '2020-01-01')

ggplot(data = pd,
       aes(x = date,
           y = p,
           group = ccaa)) +
  geom_hline(yintercept = 100, color = 'darkgrey') +
  # geom_line() +
  # geom_area(aes(ymin = 100)) +
  geom_ribbon(aes(ymin=100, ymax=p), fill = 'darkgrey', alpha = 0.5) +
  geom_line() +
  geom_point(aes(color = type), alpha = 0.7) +
  facet_wrap(~ccaa) +
  scale_color_manual(name = '',
                     values = c('darkred', 'blue')) +
  # scale_fill_manual(name = '',
  #                    values = c('darkred', 'blue')) +
  databrew::theme_simple() +
  theme(legend.position = 'right',
        strip.text = element_text(size = 12, hjust = 0.5)) +
  labs(x = '',
       y = 'Percent',
       title = 'Excess mortality by region',
       caption = 'Raw from the Economist: https://github.com/TheEconomist/covid-19-excess-deaths-tracker/blob/master/output-data/historical-deaths/spain_weekly_deaths.csv\nChart by Joe Brew. Ceuta, Melilla, and Canarias removed due to low numbers.') 

  # geom_hline(yintercept = 100)
ggsave('~/Desktop/plot.png')
pd <- excess %>% dplyr::rename(ccaa = region) %>% 
  filter(ccaa != 'Spain') %>%
  left_join(esp_pop %>%
              mutate(ccaa = ifelse(ccaa == 'Baleares', 'Balears, Illes',
                                   ifelse(ccaa == 'CLM', 'Castile-La Mancha',
                                          ifelse(ccaa == 'CyL', 'Castile & León',
                                                 ifelse(ccaa == 'C. Valenciana', 'Comunitat Valenciana', ccaa)))))) %>%
  mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalonia', ccaa)) %>%
  mutate(ccaa = ifelse(ccaa == 'Comunitat Valenciana', 'Valencia', ccaa)) %>%
  mutate(ccaa = ifelse(ccaa == 'País Vasco', 'Basque country', ccaa)) %>%
  mutate(date = end_date)

# Calculate percentage of normal
pd <- pd %>%
  mutate(p = total_deaths / expected_deaths * 100) %>% filter(!ccaa %in% c('Ceuta', 'Melilla', 'Canarias')) %>%
  mutate(type = ifelse(p > 100, 'Above\nexpected\n', 'Below\nexpected\n'))

# pd <- pd %>% filter(ccaa %in% c('Castile & León',
#                                 'Castile-La Mancha',
#                                 'Catalonia',
#                                 'Madrid'))
pd <- pd %>% filter(date > '2020-01-01')
pd$grp <- ifelse(grepl('Madrid|Castil|Rioj|Catal|Navarr', pd$ccaa), pd$ccaa, 'Others')
cols <- c(RColorBrewer::brewer.pal(n = length(unique(pd$grp))-1, 'Set1'), 'black')
cols[length(cols)-1] <- 'pink'
pd <- pd %>% arrange(desc(p))
pd$ccaa <- factor(pd$ccaa, levels = unique(pd$ccaa))
ggplot(data = pd,
       aes(x = date,
           y = p,
           group = ccaa)) +
  geom_hline(yintercept = 100, color = 'darkgrey') +
  geom_line(data = pd %>% filter(grp == 'Others'),
            aes(color = grp), alpha = 0.7) +
    geom_line(data = pd %>% filter(grp != 'Others'),
            aes(color = grp),
            size = 2, alpha = 0.8) +
  databrew::theme_simple() +
  theme(legend.position = 'right',
        strip.text = element_text(size = 12, hjust = 0.5)) +
  scale_color_manual(name = '',
                     values = cols) +
  labs(x = '',
       y = 'Percent',
       title = 'Excess mortality by region',
       caption = 'Raw from the Economist: https://github.com/TheEconomist/covid-19-excess-deaths-tracker/blob/master/output-data/historical-deaths/spain_weekly_deaths.csv\nChart by Joe Brew. Ceuta, Melilla, and Canarias removed due to low numbers.') 

ggsave('~/Desktop/plot2.png')
pd <- df_country %>%
  filter(country == 'India')

ggplot(data = pd,
       aes(x = date,
           y = cases_non_cum)) +
  geom_point() +
  geom_segment(aes(xend = date, yend = 0)) +
  theme_simple() +
  labs(x = 'Date',
       y = 'Incident cases',
       title = 'INDIA: Confirmed COVID-19 cases')

pd <- esp_df %>%
  filter(ccaa %in% c('Cataluña', 'Madrid'))
ggplot(data = pd,
       aes(x = date,
           y = cases_non_cum)) +
  geom_point() +
  geom_segment(aes(xend = date, yend = 0)) +
  theme_simple() +
  labs(x = 'Date',
       y = 'Incident cases',
       title = 'Confirmed COVID-19 cases') +
  facet_wrap(~ccaa)

# Africa cases
# Number of Africa countries
pd <- df_country
pd %>% left_join(world_pop) %>%
  # filter(sub_region %in% c('Sub-Saharan Africa')) %>%
  filter(region %in% 'Africa') %>%
  filter(cases > 0) %>%
  group_by(date) %>%
  summarise(cases = sum(cases))
# A tibble: 121 x 2
   date       cases
   <date>     <dbl>
 1 2020-02-25     1
 2 2020-02-26     1
 3 2020-02-27     1
 4 2020-02-28     2
 5 2020-02-29     2
 6 2020-03-01     2
 7 2020-03-02     6
 8 2020-03-03     9
 9 2020-03-04    19
10 2020-03-05    21
# … with 111 more rows
# Number of Africa countries
pd <- df_country
pd <- pd %>% left_join(world_pop) %>%
  filter(sub_region %in% c('Sub-Saharan Africa')) %>%
  filter(cases > 0)
pd <- pd %>% group_by(date) %>% tally

ggplot(data = pd,
       aes(x = date,
           y = n)) +
  # geom_line() +
  geom_area(fill = 'darkorange',
            color = 'black',
            alpha = 0.6) +
  theme_simple() +
  labs(x = 'Date',
       y = 'Countries',
       title = 'Sub-Saharan African countries with confirmed COVID-19 cases')

# Overall Africa cases
pd <- df_country
pd <- pd %>% left_join(world_pop) %>%
  filter(sub_region %in% c('Sub-Saharan Africa'))
x = pd %>%
  group_by(date) %>%
  summarise(cases = sum(cases),
            deaths = sum(deaths),
            n = length(country[cases > 0]))

x %>% filter(date == '2020-04-13' | date == '2020-05-13' |  date == max(date) | date == '2020-03-13')
# A tibble: 4 x 4
  date        cases deaths     n
  <date>      <dbl>  <dbl> <int>
1 2020-03-13     45      0    10
2 2020-04-13   8239    166    42
3 2020-05-13  44563   1025    44
4 2020-06-24 234314   4516    44
# Tests
pd <- testing
entity_split <- strsplit(pd$Entity, split = ' - ')
pd$country <- unlist(lapply(entity_split, function(x){x[1]}))
pd$key <- unlist(lapply(entity_split, function(x){x[2]}))
# pd <- pd %>% filter(key == 'tests performed')
pd <- pd %>% left_join(world_pop)# %>%
  # filter(sub_region %in% c('Sub-Saharan Africa', 'Southern Europe', 'Northern Europe', 'Western Europe'))

ggplot(data = pd,
       aes(x = Date,
           y = `Cumulative total`,
           group = country)) +
  geom_line(aes(color = country))

x = pd %>% group_by(country) %>%
  filter(Date == max(Date)) %>%
  ungroup %>%
  mutate(sub_region = ifelse(sub_region == 'Sub-Saharan Africa',
                             'SSA', 'Other')) %>%
  filter(!is.na(sub_region)) %>%
  arrange(`Cumulative total`)
x$sub_region <- factor(x$sub_region, levels = unique(x$sub_region))
x$country <- factor(x$country, levels = unique(x$country))
ggplot(data = x,
       aes(x = country,
           y = `Cumulative total`)) +
  geom_bar(aes(fill = sub_region), stat = 'identity') +
  theme_simple() +
  scale_fill_manual(name = '',
                    values = c('grey', 'darkorange')) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(x= 'Country',
       title = 'Tests administered')

ggsave('~/Desktop/man/3.png')
Error in grid.newpage(): could not open file '/home/joebrew/Desktop/man/3.png'
pd = esp_df %>%
  left_join(esp_pop) %>%
  mutate(p = deaths_non_cum / pop * 1000000)

ggplot(data = pd,
       aes(x = date,
           y = p)) +
  # geom_step() +
  geom_bar(stat = 'identity',
           fill = 'red',
           alpha = 0.6,
           color = NA) +
  # geom_ribbon(aes(x = date, ymin = 0, ymax = p), data = pd, fill = 'blue') +
  facet_wrap(~ccaa) +
  theme_minimal() +
  labs(x = 'Date',
       y = 'Daily deaths per 1,000,000',
       title = 'Daily deaths per 1,000,000 population')

Daily cases Spain

pd <- df_country %>%
  filter(country == 'Spain')

ggplot(data = pd,
       aes(x = date,
           y = cases_non_cum)) +
  geom_bar(stat = 'identity') +
  theme_simple() +
  labs(x = 'Fecha',
       y = 'Casos diarios',
       title = 'Casos confirmados nuevos')

Daily deahts in Spain

pd <- df_country %>%
  filter(country == 'Spain')

ggplot(data = pd,
       aes(x = date,
           y = deaths_non_cum)) +
  geom_bar(stat = 'identity') +
  theme_simple() +
  labs(x = 'Fecha',
       y = 'Muertes diarias',
       title = 'Muertes')

Daily cases world / population-adjusted

covid19::plot_day_zero(countries = c('Italy', 'Spain', 'US', 'Germany',
                                     'Canada', 'UK', 'Netherlands'
                                     ),
                       ylog = F,
                       day0 = 1,
                       cumulative = F,
                       calendar = T,
                       pop = T,
                       point_alpha = 0,
                       color_var = 'geo')

Cases per pop last week

pd <- df_country %>%
  left_join(world_pop %>% dplyr::select(iso, pop)) %>%
  group_by(country) %>%
  mutate(max_date = max(date)) %>%
  mutate(week_ago = max_date - 6) %>%
  # filter(date == max(date)) %>%
  filter(date >= week_ago, date <= max_date) %>%
  group_by(country) %>%
  summarise(y = sum(cases_non_cum),
            pop = dplyr::first(pop),
            date_range = paste0(min(date), ' - ', max(date)),
            yp = sum(cases_non_cum) / dplyr::first(pop) * 1000000) %>%
  ungroup %>%
  filter(pop > 1000000) %>%
  arrange(desc(yp)) %>%
  head(15) %>%
  mutate(country = ifelse(country == 'United Kingdom', 'UK', country))
pd$country <- factor(pd$country, levels = unique(pd$country))

ggplot(data = pd,
       aes(x = country,
           y = yp)) +
  geom_bar(stat = 'identity') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12)) +
  geom_text(aes(label = round(yp, digits = 0)),
            nudge_y = -50,
            color = 'white') +
  labs(x = '',
       y = 'Cases per 1,000,000 (last 7 days)',
       title = 'New confirmed COVID-19 cases per million population, last 7 days')

Deaths per pop last week

pd <- df_country %>%
  left_join(world_pop %>% dplyr::select(iso, pop)) %>%
  group_by(country) %>%
  mutate(max_date = max(date)) %>%
  mutate(week_ago = max_date - 6) %>%
  # filter(date == max(date)) %>%
  filter(date >= week_ago, date <= max_date) %>%
  group_by(country) %>%
  summarise(y = sum(deaths_non_cum),
            pop = dplyr::first(pop),
            date_range = paste0(min(date), ' - ', max(date)),
            yp = sum(deaths_non_cum) / dplyr::first(pop) * 1000000) %>%
  ungroup %>%
  filter(pop > 1000000) %>%
  arrange(desc(yp)) %>%
  head(10) %>%
  mutate(country = ifelse(country == 'United Kingdom', 'UK', country))
pd$country <- factor(pd$country, levels = unique(pd$country))

ggplot(data = pd,
       aes(x = country,
           y = yp)) +
  geom_bar(stat = 'identity') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12)) +
  geom_text(aes(label = round(yp, digits = 0)),
            nudge_y = -10,
            color = 'white') +
  labs(x = '',
       y = 'Deaths per 1,000,000 (last 7 days)',
       title = 'New confirmed COVID-19 deaths per million population, last 7 days')

Lombardia, Catalonia, Madrid

New cases in last week

pd <- esp_df %>%
  left_join(esp_pop %>% dplyr::select(ccaa, pop)) %>%
  mutate(country = 'Spain') %>%
  bind_rows(
    ita %>% left_join(ita_pop %>% dplyr::select(ccaa, pop)) %>% mutate(country = 'Italy')
  ) %>%
  bind_rows(
    df %>% filter(country == 'US') %>% mutate(ccaa = district) %>% left_join(regions_pop %>% dplyr::select(ccaa, pop)) %>% mutate(country = 'US')) %>%
  group_by(ccaa) %>%
  mutate(max_date = max(date)) %>%
  mutate(week_ago = max_date - 6) %>%
  # filter(date == max(date)) %>%
  filter(date >= week_ago, date <= max_date) %>%
  group_by(ccaa) %>%
  summarise(y = sum(cases_non_cum),
            country = dplyr::first(country),
            pop = dplyr::first(pop),
            date_range = paste0(min(date), ' - ', max(date))) %>%
  ungroup %>%
  mutate(yp = y / pop * 1000000) %>%
  ungroup %>%
  # filter(pop > 1000000) %>%
  arrange(desc(yp)) 

#Get country totals
pd %>%
  group_by(country) %>%
  summarise(y = sum(y, na.rm = T),
            pop = sum(pop, na.rm = T)) %>%
  ungroup %>%
  mutate(yp = y / pop * 1000000)
# A tibble: 3 x 4
  country      y       pop    yp
  <chr>    <dbl>     <dbl> <dbl>
1 Italy     1547  60491453  25.6
2 Spain     1061  47026208  22.6
3 US      218216 328239523 665. 
library(knitr)
library(kableExtra)
pd <- pd %>%
  mutate(yp = round(yp, digits = 1)) %>%
  mutate(Rank = 1:nrow(pd)) %>%
  dplyr::select(Rank, 
                Región = ccaa,
                `Casos nuevos, 7 días` = y,
                Población = pop,
                `Casos nuevos 7 días por millón` = yp)
kable(pd) %>%
  kable_styling("striped", full_width = F) %>%
  column_spec(which(names(pd) == 'Casos nuevos 7 días por millón'), bold = T) %>%
  row_spec(which(pd$`Región` %in% esp_df$ccaa), bold = T, color = "white", background = "#D7261E")
Rank Región Casos nuevos, 7 días Población Casos nuevos 7 días por millón
1 Arizona 19253 7278717 2645.1
2 South Carolina 7341 5148714 1425.8
3 Arkansas 3769 3017804 1248.9
4 Florida 26295 21477737 1224.3
5 Utah 3440 3205958 1073.0
6 Texas 30433 28995881 1049.6
7 Alabama 4752 4903185 969.2
8 Mississippi 2783 2976149 935.1
9 North Carolina 9593 10488084 914.7
10 Georgia 9351 10617423 880.7
11 California 33127 39512223 838.4
12 Louisiana 3843 4648794 826.7
13 Tennessee 5121 6829174 749.9
14 Iowa 2141 3155070 678.6
15 Oklahoma 2606 3956971 658.6
16 Nevada 1674 3080156 543.5
17 Nebraska 990 1934408 511.8
18 Delaware 445 973764 457.0
19 Kansas 1286 2913314 441.4
20 New Mexico 925 2096829 441.1
21 Virginia 3739 8535519 438.1
22 Minnesota 2467 5639632 437.4
23 Idaho 770 1787065 430.9
24 South Dakota 369 884659 417.1
25 Washington 3085 7614893 405.1
26 District of Columbia 281 705749 398.2
27 Wisconsin 2307 5822434 396.2
28 Maryland 2368 6045680 391.7
29 Trentino-Alto Adige 419 1070000 391.6
30 Ohio 4337 11689100 371.0
31 Rhode Island 393 1059361 371.0
32 Illinois 4355 12671821 343.7
33 Missouri 2054 6137428 334.7
34 Indiana 2127 6732219 315.9
35 Kentucky 1368 4467673 306.2
36 Oregon 1226 4217737 290.7
37 Wyoming 168 578759 290.3
38 Pennsylvania 3396 12801989 265.3
39 Colorado 1483 5758736 257.5
40 North Dakota 196 762062 257.2
41 New Jersey 2189 8882190 246.4
42 New York 4524 19453561 232.6
43 Massachusetts 1460 6892503 211.8
44 Michigan 2058 9986857 206.1
45 Aragón 196 1319291 148.6
46 West Virginia 255 1792147 142.3
47 Connecticut 484 3565287 135.8
48 Maine 181 1344212 134.7
49 Alaska 94 731545 128.5
50 Montana 136 1068778 127.2
51 New Hampshire 162 1359711 119.1
52 Lombardia 913 10040000 90.9
53 Vermont 54 623989 86.5
54 Navarra 52 654214 79.5
55 Hawaii 91 1415872 64.3
56 Emilia-Romagna 208 4453000 46.7
57 Cataluña 308 7675217 40.1
58 Madrid 223 6663394 33.5
59 Liguria 51 1557000 32.8
60 Piemonte 134 4376000 30.6
61 CyL 67 2399548 27.9
62 Valle d’Aosta 3 126202 23.8
63 Melilla 2 86487 23.1
64 Extremadura 18 1067710 16.9
65 País Vasco 34 2207776 15.4
66 Cantabria 8 581078 13.8
67 Molise 4 308493 13.0
68 Lazio 65 5897000 11.0
69 CLM 20 2032863 9.8
70 Marche 13 1532000 8.5
71 Andalucía 70 8414240 8.3
72 Campania 47 5827000 8.1
73 Toscana 29 3737000 7.8
74 La Rioja 2 316798 6.3
75 Baleares 7 1149460 6.1
76 Murcia 9 1493898 6.0
77 Galicia 14 2699499 5.2
78 Calabria 10 1957000 5.1
79 Veneto 24 4905000 4.9
80 C. Valenciana 24 5003769 4.8
81 Friuli Venezia Giulia 4 1216000 3.3
82 Canarias 7 2153389 3.3
83 Puglia 11 4048000 2.7
84 Abruzzo 3 1315000 2.3
85 Umbria 2 884640 2.3
86 Asturias 0 1022800 0.0
87 Basilicata 0 567118 0.0
88 Ceuta 0 84777 0.0
89 Sardegna -5 1648000 -3.0
90 Sicilia -388 5027000 -77.2
91 American Samoa 0 NA NA
92 Diamond Princess 0 NA NA
93 Grand Princess 0 NA NA
94 Guam 38 NA NA
95 Northern Mariana Islands 0 NA NA
96 Puerto Rico 817 NA NA
97 Recovered 0 NA NA
98 United States Virgin Islands 6 NA NA
99 US 1 NA NA
100 Virgin Islands 3 NA NA
101 Wuhan Evacuee 4 NA NA
102 NA 2 NA NA

Just Italy and Spain

xpd = pd %>% filter(`Región` %in% c(ita$ccaa, esp_df$ccaa))
xpd$Rank <- 1:nrow(xpd)

kable(xpd) %>%
  kable_styling("striped", full_width = F) %>%
  column_spec(which(names(xpd) == 'Casos nuevos 7 días por millón'), bold = T) %>%
  row_spec(which(xpd$`Región` %in% esp_df$ccaa), bold = T, color = "white", background = "#D7261E")
Rank Región Casos nuevos, 7 días Población Casos nuevos 7 días por millón
1 Trentino-Alto Adige 419 1070000 391.6
2 Aragón 196 1319291 148.6
3 Lombardia 913 10040000 90.9
4 Navarra 52 654214 79.5
5 Emilia-Romagna 208 4453000 46.7
6 Cataluña 308 7675217 40.1
7 Madrid 223 6663394 33.5
8 Liguria 51 1557000 32.8
9 Piemonte 134 4376000 30.6
10 CyL 67 2399548 27.9
11 Valle d’Aosta 3 126202 23.8
12 Melilla 2 86487 23.1
13 Extremadura 18 1067710 16.9
14 País Vasco 34 2207776 15.4
15 Cantabria 8 581078 13.8
16 Molise 4 308493 13.0
17 Lazio 65 5897000 11.0
18 CLM 20 2032863 9.8
19 Marche 13 1532000 8.5
20 Andalucía 70 8414240 8.3
21 Campania 47 5827000 8.1
22 Toscana 29 3737000 7.8
23 La Rioja 2 316798 6.3
24 Baleares 7 1149460 6.1
25 Murcia 9 1493898 6.0
26 Galicia 14 2699499 5.2
27 Calabria 10 1957000 5.1
28 Veneto 24 4905000 4.9
29 C. Valenciana 24 5003769 4.8
30 Friuli Venezia Giulia 4 1216000 3.3
31 Canarias 7 2153389 3.3
32 Puglia 11 4048000 2.7
33 Abruzzo 3 1315000 2.3
34 Umbria 2 884640 2.3
35 Asturias 0 1022800 0.0
36 Basilicata 0 567118 0.0
37 Ceuta 0 84777 0.0
38 Sardegna -5 1648000 -3.0
39 Sicilia -388 5027000 -77.2
x = esp_df %>% left_join(esp_pop) %>%
  bind_rows(ita %>% left_join(ita_pop)) %>%
  filter(date == '2020-04-09') %>%
  filter(ccaa %in% c('Madrid',
                     'Cataluña', 'Lombardia')) %>%
  dplyr::select(ccaa, deaths, cases, pop) %>%
  mutate(deathsp = deaths / pop * 100000,
         casesp = cases / pop * 100000)


covid19::plot_day_zero(countries = c('Italy', 'Spain', 'China', 'South Korea', 'Sinagpore'),
                       districts = c('Madrid', #'Hubei',
                     'Cataluña', 'Lombardia'),
                     by_district = T,
                     roll = 7,
                     deaths = F,
                     pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
  labs(x = 'Data',
       y = 'Casos diaris (mitjana mòbil de 7 dies)',
       title = 'Casos diaris per 1.000.000',
       subtitle = 'Mitjana mòbil de 7 dies') 

covid19::plot_day_zero(countries = c('Italy', 'Spain'),
                     roll = 7,
                     deaths = F,
                     pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
  labs(x = 'Data',
       y = 'Casos diaris per 1.000.000 (mitjana mòbil de 7 dies)',
       title = 'Casos diaris per 1.000.000',
       subtitle = 'Mitjana mòbil de 7 dies') +
  theme(legend.direction = 'horizontal',
        legend.position = 'top')

covid19::plot_day_zero(countries = c('Italy', 'Spain'),
                     roll = 7,
                     deaths = T,
                     pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
  labs(x = 'Data',
       y = 'Morts diaris per 1.000.000 (mitjana mòbil de 7 dies)',
       title = 'Morts diaris per 1.000.000',
       subtitle = 'Mitjana mòbil de 7 dies') +
  theme(legend.direction = 'horizontal',
        legend.position = 'top')

covid19::plot_day_zero(countries = c('Italy', 'Spain'),
                       by_district = T,
                       districts = c('Madrid', 'Emilia-Romagna',
                     'Cataluña', 'Lombardia'),
                     roll = 7,
                     deaths = F,
                     pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
  labs(x = 'Data',
       y = 'Casos diaris per 1.000.000 (mitjana mòbil de 7 dies)',
       title = 'Casos diaris per 1.000.000',
       subtitle = 'Mitjana mòbil de 7 dies') +
  theme(legend.direction = 'horizontal',
        legend.position = 'top')

Asia

covid19::plot_day_zero(countries = c('Japan', 'South Korea', 'Singapore', 'Hong Kong'),
                     roll = 7,
                     deaths = F,
                     # pop = T,
                     day0 = 0,
                     ylog = F,
                     calendar = T,
                     cumulative = F) +
    labs(x = 'Data',
       y = 'Casos diaris (mitjana mòbil de 3 dies)',
       title = 'Casos diaris',
       subtitle = 'Mitjana mòbil de 3 dies') +
  facet_wrap(~geo, scales = 'free_y') +
  theme(legend.position = 'none')

Day of week analysis

pd <- esp_df %>%
  arrange(date) %>%
  group_by(date) %>%
  summarise(deaths_non_cum = sum(deaths_non_cum),
            cases_non_cum = sum(cases_non_cum)) %>%
  ungroup %>%  
  mutate(dow = weekdays(date)) %>%
  mutate(week = isoweek(date)) %>%
  group_by(week) %>%
  mutate(start_date = min(date)) %>%
  ungroup %>%
  filter(date >= '2020-03-09')
pd$dow <- factor(pd$dow,
                 levels = c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
                            'Saturday', 'Sunday'),
                 labels = c('Lunes', 'Martes', 'Miércoles', 'Jueves', 'Viernes',
                            'Sábado', 'Domingo'))
n_cols <- length(unique(pd$start_date))
cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))(n_cols)
pd$start_date <- factor(pd$start_date)
ggplot(data = pd,
       aes(x = dow,
           y = cases_non_cum,
           group = week,
           color = start_date)) +
  geom_line(size = 4) +
  geom_point(size = 4) +
  scale_color_manual(name = 'Primer día\nde la semana',
                     values = cols) +
  theme_simple() +
  labs(x = 'Día de la semana',
       y = 'Muertes')

Basque country vs rest of Spain

pd <- esp_df %>%
  mutate(geo = ifelse(ccaa == 'País Vasco', 'Basque country', 'Rest of Spain')) %>%
  group_by(geo, date) %>%
  summarise(deaths = sum(deaths)) %>%
  ungroup
pp <- esp_pop %>%
    mutate(geo = ifelse(ccaa == 'País Vasco', 'Basque country', 'Rest of Spain')) %>%
  group_by(geo) %>%
  summarise(pop = sum(pop))
pd <- left_join(pd, pp) %>% mutate(pk = deaths / pop * 100000)

ggplot(data = pd %>% filter(pk > 0.1),
       aes(x = date,
           y = pk,
           color = geo)) +
  geom_line() +
  labs(x = 'Date',
       y = 'Cumulative deaths per 100,000') +
  scale_color_manual(name = '',
                     values = c('red', 'black')) +
  theme_simple() 

Country trajectories, population adjusted

countries <- c(
  'Spain',
  'US',
  'France',
  # 'Portugal',
  'Italy',
  'China'
)
districts <- c('Lombardia', 'Cataluña', 
               'New York', 
               # 'Hubei',
               'CyL', 
               'CLM', 
               # 'Washington',
               'La Rioja',
               'Madrid')

plot_day_zero(countries = countries,
              districts = districts,
              ylog = F,
              day0 = 1,
              cumulative = F,
              time_before = 0,
              roll = 3,
              deaths = T,
              pop = T,
              by_district = T,
              point_alpha = 0,
              line_size = 3,
              color_var = 'geo')

Italy and Spain

dir.create('/tmp/totesmou')
plot_day_zero(countries = c('Spain', 'Italy', max_date = Sys.Date()-1),
              point_size = 2, calendar = T)

ggsave('/tmp/totesmou/1_italy_vs_spain.png',
       height = 5.6,
       width = 9.6)
plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = F)

ggsave('/tmp/totesmou/2_italy_vs_spain_temps_ajustat.png',
       height = 5.6,
       width = 9.6)
plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = T, deaths = T, day0 = 10)

plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = F, deaths = T, day0 = 10)

plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = F, deaths = T, day0 = 10, pop = T)

plot_day_zero(countries = c('Spain', 'Italy'),
              point_size = 2, calendar = F, deaths = T, day0 = 1, pop = T, roll = 3, roll_fun = 'mean')

plot_day_zero(countries = c('Spain', 'Italy'),
              districts = c('Cataluña', 'Madrid',
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              day0 = 10,
              pop = T)

plot_day_zero(countries = c('Spain', 'Italy'),
              districts = c('Cataluña', 'Madrid',
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              deaths = T,
              day0 = 1,
              pop = T)

plot_day_zero(countries = c('Spain', 'Italy'),
              districts = c('Cataluña', 'Madrid',
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              deaths = T,
              roll = 3,
              roll_fun = 'mean',
              day0 = 1,
              ylog = F,
              pop = T, calendar = T)

plot_day_zero(countries = c('Spain', 'Italy', 'US'),
              districts = c('Cataluña', 'Madrid', 
                            'New York', 
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              deaths = T,
              roll = 3,
              roll_fun = 'mean',
              day0 = 1,
              pop = T)

plot_day_zero(countries = c('Spain', 'Italy', 'US'),
              districts = c('Cataluña', 'Madrid', 
                            'New York', 
                            'Lombardia', 'Emilia-Romagna'),
              by_district = T,
              deaths = T,
              # roll = 7,
              roll_fun = 'mean',
              day0 = 1,
              pop = F)

plot_day_zero(color_var = 'iso', by_district = T,
              deaths = T,
              day0 = 1,
              alpha = 0.6,
              point_alpha = 0,
              calendar = T,
              countries = c('Spain', 'Italy'),
              pop = T)

place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
                                      # ifelse(x == 'Cataluña', 'Cataluña',
                                             'Altres CCAA')
  # )
}
place_transform_ita <- function(x){
  ifelse(x == 'Lombardia', 'Lombardia', 
         # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 
                'Altres regions italianes')
  # )
}
pd <- esp_df %>% mutate(country = 'Espanya') %>%
  mutate(ccaa = place_transform(ccaa)) %>%
  bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
                           country = 'Itàlia')) %>%
  group_by(country, ccaa, date) %>% 
  summarise(cases = sum(cases),
            uci = sum(uci),
            deaths = sum(deaths),
            cases_non_cum = sum(cases_non_cum),
            deaths_non_cum = sum(deaths_non_cum),
            uci_non_cum = sum(uci_non_cum)) %>%
  left_join(esp_pop %>%
              mutate(ccaa = place_transform(ccaa)) %>%
              bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
              group_by(ccaa) %>%
              summarise(pop = sum(pop))) %>%
  mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
  group_by(country, date) %>%
  mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
         p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
                  levels = c('Madrid',
                             # 'Cataluña',
                             'Altres CCAA',
                             'Lombardia',
                             # 'Emilia Romagna',
                             'Altres regions italianes'))
cols <- c(
  rev(brewer.pal(n = 3, 'Reds'))[1:2],
  rev(brewer.pal(n = 3, 'Blues'))[1:2]
)

label_data <- pd %>%
  filter(country  %in% c('Itàlia', 'Espanya')) %>%
  group_by(country) %>%
  filter(date == max(date))  %>%
  mutate(label = gsub('\nitalianas', '',  gsub(' ', '\n', ccaa))) %>%
  mutate(x = date - 2,
         y = p_deaths_country + 
           ifelse(p_deaths_country > 50, 10, -9))
ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=50])) %>% filter(date >= start_day),
       aes(x = date,
           y = p_deaths_country,
           color = ccaa,
           group = ccaa)) +
  # geom_bar(stat = 'identity',
  #          position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  geom_line(size = 2,
            aes(color = ccaa)) +
  # geom_point(size = 3,
  #            aes(color = ccaa)) +
  facet_wrap(~country) +
  # xlim(as.Date('2020-03-09'),
  #      Sys.Date()-1) +
  theme_simple() +
  geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
  # geom_line(lty = 2, alpha = 0.6) +
  labs(x = 'Data',
       y = 'Percentatge de morts',
       title = 'Percentatge de morts: regió més afectada vs resta del pais',
       subtitle = 'A partir del primer día a cada país amb 50 morts acumulades') +
  theme(legend.position = 'top',
        strip.text = element_text(size = 30),
        legend.text = element_text(size = 10))  +
  guides(color = guide_legend(nrow = 2,
                              keywidth = 5)) +
  geom_text(data = label_data,
            aes(x = x-2,
                y = y,
                label = label,
                color = ccaa),
            size = 7,
            show.legend = FALSE)

# Same cahrt as previous, but one shared axis
place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
                                      # ifelse(x == 'Cataluña', 'Cataluña',
                                             'Rest of Spain')
  # )
}
place_transform_ita <- function(x){
  ifelse(x == 'Lombardia', 'Lombardia', 
         # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 
                'Rest of Italy')
  # )
}
pd <- esp_df %>% mutate(country = 'Spain') %>%
  mutate(ccaa = place_transform(ccaa)) %>%
  bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
                           country = 'Italy')) %>%
  group_by(country, ccaa, date) %>% 
  summarise(cases = sum(cases),
            uci = sum(uci),
            deaths = sum(deaths),
            cases_non_cum = sum(cases_non_cum),
            deaths_non_cum = sum(deaths_non_cum),
            uci_non_cum = sum(uci_non_cum)) %>%
  left_join(esp_pop %>%
              mutate(ccaa = place_transform(ccaa)) %>%
              bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
              group_by(ccaa) %>%
              summarise(pop = sum(pop))) %>%
  mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
  group_by(country, date) %>%
  mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
         p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
                  levels = c('Madrid',
                             # 'Cataluña',
                             'Rest of Spain',
                             'Lombardia',
                             # 'Emilia Romagna',
                             'Rest of Italy'))
cols <- c(
  rev(brewer.pal(n = 3, 'Reds'))[1:2],
  rev(brewer.pal(n = 3, 'Blues'))[1:2]
)

label_data <- pd %>%
  filter(country  %in% c('Italy', 'Spain')) %>%
  group_by(country) %>%
  filter(date == max(date))  %>%
  # mutate(label = gsub('\nitalianas', '',  gsub(' ', '\n', ccaa))) %>%
  mutate(x = date - 2,
         y = p_deaths_country + 
           ifelse(p_deaths_country > 50, 10, -9)) %>%
  ungroup
ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=30])) %>% filter(date >= start_day),
       aes(x = date,
           y = p_deaths_country,
           color = ccaa,
           group = ccaa)) +
  # geom_bar(stat = 'identity',
  #          position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  geom_line(size = 2,
            aes(color = ccaa)) +
  # geom_point(size = 3,
  #            aes(color = ccaa)) +
  # facet_wrap(~country, scales = 'free_x') +
  # xlim(as.Date('2020-03-09'),
  #      Sys.Date()-1) +
  theme_simple() +
  geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
  # geom_line(lty = 2, alpha = 0.6) +
  labs(x = 'Date',
       y = 'Percentage of deaths',
       title = 'Percentage of country\'s cumulative COVID-19 deaths by geography',
       subtitle = 'Starting at first day with 50 or more cumulative deaths') +
  theme(legend.position = 'top',
        strip.text = element_text(size = 30),
        legend.text = element_text(size = 10))  +
  guides(color = guide_legend(nrow = 2,
                              keywidth = 5))# +

  # geom_text(data = label_data,
  #           aes(x = x-2,
  #               y = y,
  #               label = label,
  #               color = ccaa),
  #           size = 7,
  #           show.legend = FALSE)

Same chart as above but absolute numbers

# Same cahrt as previous, but one shared axis
place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
                                      # ifelse(x == 'Cataluña', 'Cataluña',
                                             'Rest of Spain')
  # )
}
place_transform_ita <- function(x){
  ifelse(x == 'Lombardia', 'Lombardia', 
         # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 
                'Rest of Italy')
  # )
}
pd <- esp_df %>% mutate(country = 'Spain') %>%
  mutate(ccaa = place_transform(ccaa)) %>%
  bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
                           country = 'Italy')) %>%
  group_by(country, ccaa, date) %>% 
  summarise(cases = sum(cases),
            uci = sum(uci),
            deaths = sum(deaths),
            cases_non_cum = sum(cases_non_cum),
            deaths_non_cum = sum(deaths_non_cum),
            uci_non_cum = sum(uci_non_cum)) %>%
  left_join(esp_pop %>%
              mutate(ccaa = place_transform(ccaa)) %>%
              bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
              group_by(ccaa) %>%
              summarise(pop = sum(pop))) %>%
  mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
  group_by(country, date) %>%
  mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
         p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
                  levels = rev(c('Madrid',
                             # 'Cataluña',
                             'Rest of Spain',
                             'Lombardia',
                             # 'Emilia Romagna',
                             'Rest of Italy')))
cols <- c(
  rev(brewer.pal(n = 3, 'Reds'))[1:2],
  rev(brewer.pal(n = 3, 'Blues'))[1:2]
)

label_data <- pd %>%
  filter(country  %in% c('Italy', 'Spain')) %>%
  group_by(country) %>%
  filter(date == max(date))  %>%
  # mutate(label = gsub('\nitalianas', '',  gsub(' ', '\n', ccaa))) %>%
  mutate(x = date - 2,
         y = p_deaths_country + 
           ifelse(p_deaths_country > 50, 10, -9)) %>%
  ungroup

# Get moving
ma <- function(x, n = 2){
    
    if(length(x) >= n){
      stats::filter(x, rep(1 / n, n), sides = 1)
    } else {
      new_n <- length(x)
      stats::filter(x, rep(1 / new_n, new_n), sides = 1)
    }
}


ggplot(data = pd %>% group_by(country) %>% 
         mutate(start_day = dplyr::first(date[deaths >=1])) %>% 
         filter(date >= start_day) %>% 
         mutate(days_since = as.numeric(date - start_day)) %>%
         ungroup %>% arrange(date) %>%
         group_by(country, ccaa) %>%
         mutate(rolling_average = ma(deaths_non_cum, n = 3)) %>%
         ungroup,
       aes(x = date,
           y = rolling_average,
           color = ccaa,
           group = ccaa)) +
  # geom_bar(stat = 'identity',
  #          position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  geom_line(size = 2,
            aes(color = ccaa)) +
  # geom_point(size = 3,
  #            aes(color = ccaa)) +
  # scale_y_log10(limits = c(1.5, 1000)) +
  # scale_y_log10() +
  facet_wrap(~country) +
  # xlim(as.Date('2020-03-09'),
  #      Sys.Date()-1) +
  theme_simple() +
  # geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
  # geom_line(lty = 2, alpha = 0.6) +
  labs(x = 'Date',
       y = 'Deaths (log-scale)',
       title = 'Daily COVID-19 deaths by geography',
       subtitle = '3-day rolling average') +
  theme(legend.position = 'top',
        plot.title = element_text(size = 30),
        plot.subtitle = element_text(size = 24),
        strip.text = element_text(size = 30, hjust = 0.5),
        legend.text = element_text(size = 20))  +
  guides(color = guide_legend(nrow = 2,
                              keywidth = 5))

ggsave('~/Desktop/madlom.png')
roll_curve <- function(data,
                       n = 4,
                       deaths = FALSE,
                       scales = 'fixed',
                       nrow = NULL,
                       ncol = NULL,
                       pop = FALSE){

  # Create the n day rolling avg
  ma <- function(x, n = 5){
    
    if(length(x) >= n){
      stats::filter(x, rep(1 / n, n), sides = 1)
    } else {
      new_n <- length(x)
      stats::filter(x, rep(1 / new_n, new_n), sides = 1)
    }
    
    
  }
  
  pd <- data
  if(deaths){
    pd$var <- pd$deaths_non_cum
  } else {
    pd$var <- pd$cases_non_cum
  }
  
  if('ccaa' %in% names(pd)){
    pd$geo <- pd$ccaa
  } else {
    pd$geo <- pd$iso
  }
  
  if(pop){
    # try to get population
    if(any(pd$geo %in% unique(esp_df$ccaa))){
      right <- esp_pop
      right_var <- 'ccaa'
    } else {
      right <- world_pop
      right_var <- 'iso'
    }
    pd <- pd %>% left_join(right %>% dplyr::select(all_of(right_var), pop),
                           by = c('geo' = right_var)) %>%
      mutate(var = var / pop * 100000)
  }
  
  pd <- pd %>%
    arrange(date) %>%
    group_by(geo) %>%
    mutate(with_lag = ma(var, n = n))
  
  
  ggplot() +
    geom_bar(data = pd,
         aes(x = date,
             y = var),
             stat = 'identity',
         fill = 'grey',
         alpha = 0.8) +
    geom_area(data = pd,
              aes(x = date,
                  y = with_lag),
              color = 'red',
              fill = 'red',
              alpha = 0.6) +
    facet_wrap(~geo, scales = scales, nrow = nrow, ncol = ncol) +
    theme_simple() +
    labs(x = '',
         y = ifelse(deaths, 'Deaths', 'Cases'),
         title = paste0('Daily (non-cumulative) ', ifelse(deaths, 'deaths', 'cases'),
                        ifelse(pop, ' (per 100,000)', '')),
         subtitle = paste0('Data as of ', max(data$date),
                           '.\nRed line = ', n, ' day rolling average. Grey bar = day-specific value.')) +
    theme(axis.text.x = element_text(size = 7,
                                     angle = 90,
                                     hjust = 0.5, vjust = 1)) #+
    # scale_x_date(name ='',
    #              breaks = sort(unique(pd$date)),
    #              labels = format(sort(unique(pd$date)), '%b %d'))
    # scale_y_log10()
}
this_ccaa <- 'Cataluña'
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
roll_curve(plot_data, scales = 'fixed')  + theme(strip.text = element_text(size = 30))

plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
roll_curve(plot_data, deaths = T, scales = 'fixed') + theme(strip.text = element_text(size = 30))

african_countries <-  world_pop$country[world_pop$sub_region %in% c('Sub-Saharan Africa')]

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 1,
                    max_date = Sys.Date() - 46,
                    ylog = F) +
  ylim(0, 5000)
pd

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 1,
                    max_date = Sys.Date(),
                    ylog = F) +
  ylim(0, 5000)  
pd

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 1,
                    calendar = T,
                    max_date = Sys.Date(),
                    ylog = T) + theme(legend.position = 'none')
pd

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 10,
                    max_date = Sys.Date() - 13)
pd

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 10,
                    max_date = Sys.Date() - 6)
pd

pd <- plot_day_zero(countries = c(african_countries),
                    day0 = 10,
                    max_date = Sys.Date(),
                    ylog = F)
pd

latam_countries <-  world_pop$country[world_pop$sub_region %in% c('Latin America and the Caribbean')]
latam_countries <- latam_countries[!latam_countries %in% c('Guyana')]

pd <- plot_day_zero(countries = c(latam_countries),
                    day0 = 10,
                    max_date = Sys.Date() - 13)
pd

pd <- plot_day_zero(countries = c(latam_countries),
                    day0 = 10,
                    max_date = Sys.Date() - 6)
pd

pd <- plot_day_zero(countries = c(latam_countries),
                    day0 = 10,
                    max_date = Sys.Date())
pd

Latin America and Africa vs Europe

isos <- sort(unique(world_pop$sub_region))
keep_countries <- world_pop$country[world_pop$sub_region %in% c('Latin America and the Caribbean', 'Sub-Saharan Africa') |
                                      world_pop$region %in% 'Europe']
keep_countries <- keep_countries[!keep_countries %in% c('Guyana')]
pd <- df_country %>% ungroup %>%
  filter(country %in% keep_countries) %>%
  dplyr::select(-country) %>%
  left_join(world_pop) %>%
  group_by(iso) %>%
  mutate(day0 = min(date[cases >= 10])) %>%
  ungroup %>%
  mutate(days_since = date - day0) %>%
  filter(days_since >= 0)


cols <- c( 'black')
g <- ggplot(data = pd,
       aes(x = days_since,
           y = cases,
           group = country,
           color = region)) +
  geom_line(data = pd %>% filter(region == 'Europe'),
            alpha = 0.6) +
  scale_y_log10() +
  scale_color_manual(name = '', values = cols) +
  theme_simple() +
  labs(x = 'Days since first day at 10 cases') +
  theme(legend.position = 'top')
g

cols <- c('darkred', 'black')

g + 
    geom_line(data = pd %>% filter(region == 'Africa'),
            # alpha = 1,
            size = 1.5,
            alpha = 0.8) +
    scale_y_log10() +
    scale_color_manual(name = '', values = cols) 

cols <- c( 'darkorange', 'black')
g + 
    geom_line(data = pd %>% filter(sub_region == 'Latin America and the Caribbean'),
            # alpha = 1,
            size = 1.5,
            alpha = 0.8) +
    scale_color_manual(name = '', values = cols) 

cols <- c('darkred', 'darkorange', 'black')
g + 
    geom_line(data = pd %>% filter(sub_region != 'Europe'),
            # alpha = 1,
            size = 1.5,
            alpha = 0.8) +
    scale_color_manual(name = '', values = cols) 

# Assets
pyramid_dir <- '../../data-raw/pyramids/'
pyramid_files <- dir(pyramid_dir)
out_list <- list()
for(i in 1:length(pyramid_files)){
  out_list[[i]] <- read_csv(paste0(pyramid_dir, pyramid_files[i])) %>%
    mutate(region = gsub('.csv', '', pyramid_files[i]))
}
pyramid <- bind_rows(out_list)
make_pyramid <- function(the_region = 'AFRICA-2019'){
  sub_data <- pyramid %>% filter(region == the_region)
  sub_data$Age <- factor(sub_data$Age, levels = sub_data$Age)
  sub_data <- tidyr::gather(sub_data, key, value, M:F)
  ggplot(data = sub_data,
         aes(x = Age,
             y = value,
             fill = key)) +
    geom_bar(stat = 'identity',
             position = position_dodge(),
             alpha = 0.6,
             color = 'black') +
    scale_fill_manual(name = '', values = c('darkorange', 'lightblue')) +
    theme_simple() +
    labs(x = 'Age group',
         y = 'Population') +
    theme(legend.position = 'top')
}
make_pyramid_overlay <- function(){
  sub_data <- pyramid %>% filter(region %in% c('EUROPE-2019',
                                               'AFRICA-2019',
                                               'LATIN AMERICA AND THE CARIBBEAN-2019')) %>%
    mutate(region = gsub('-2019', '', region))
   sub_data$Age <- factor(sub_data$Age, levels = unique(sub_data$Age))
  sub_data <- tidyr::gather(sub_data, key, value, M:F) %>%
    group_by(Age, region) %>%
    summarise(value = sum(value)) %>%
    ungroup %>%
    group_by(region) %>%
    mutate(p = value / sum(value) * 100) %>%
    ungroup
  ggplot(data = sub_data,
         aes(x = Age,
             y = p,
             color = region,
             group = region,
             fill = region)) +
    geom_area(position = position_dodge(),
              alpha = 0.6) +
    scale_fill_manual(name = '',
                      values = c('darkred', 'darkorange', 'black')) +
    scale_color_manual(name = '',
                      values = c('darkred', 'darkorange', 'black')) +
    theme_simple() +
    theme(legend.position = 'top') +
    labs(x = 'Age group', y = 'Percentage')
}

make_pyramid(the_region = 'Spain-2019') + labs(title = 'Spain')
make_pyramid(the_region = 'Italy-2019') + labs(title = 'Italy')

make_pyramid(the_region = 'EUROPE-2019') + labs(title = 'Europe')
make_pyramid(the_region = 'Kenya-2019') + labs(title = 'Kenya')
make_pyramid(the_region = 'Mozambique-2019') + labs(title = 'Mozambique')
make_pyramid(the_region = 'Tanzania-2019') + labs(title = 'Tanzania')

make_pyramid(the_region = 'Guatemala-2019') + labs(title = 'Guatemala')

make_pyramid(the_region = 'AFRICA-2019') + labs(title = 'Africa')

make_pyramid(the_region = 'LATIN AMERICA AND THE CARIBBEAN-2019') + labs(title = 'Latin America and the Caribbean')

make_pyramid_overlay() + labs(title = 'Population distribution by region')

pyramid <- pyramid %>%
  mutate(old = Age %in% c('80-84', '85-89', '90-94','95-99', '100+'))

pyramid %>%
  group_by(region, old) %>%
  summarise(n = sum(M) + sum(F)) %>%
  ungroup %>%
  group_by(region) %>%
  mutate(p = n / sum(n) * 100) %>%
  filter(old)
plot_day_zero(countries = c('South Africa', 'Spain'), day0 = 1, calendar = T)

plot_day_zero(countries = c('Kenya', 'Italy'), day0 = 10, calendar = F)

Pics

plot_day_zero(countries = c('China', 'Italy', 'Spain'),
              districts = c('Hubei', 'Lombardia', 'Cataluña', 'Madrid'),
              by_district = T,
              point_alpha = 0,
              day0 = 5,
              pop = F,
              deaths = T,
              ylog = T,
              calendar = F,
              roll = 5)

Map of portugal, france, spain

# cat_transform <- function(x){ifelse(x == 'Catalunya', 'Cataluña', x)}
cat_transform <- function(x){return(x)}
pd <- por_df %>% mutate(country = 'Portugal') %>%
  bind_rows(esp_df %>% mutate(country = 'Spain')) %>%
  bind_rows(fra_df %>% mutate(country = 'France')) %>%
  bind_rows(ita %>% mutate(country = 'Italy')) %>%
  bind_rows(
    df %>% filter(country == 'Andorra') %>%
      mutate(ccaa = 'Andorra')
  )
keep_date = pd %>% group_by(country) %>% summarise(max_date= max(date)) %>% summarise(x = min(max_date)) %>% .$x
pd <- pd %>%
  mutate(ccaa = cat_transform(ccaa)) %>%
  group_by(ccaa) %>%
  filter(date == keep_date) %>%
  # filter(date == '2020-03-27') %>%
  ungroup %>%
  dplyr::select(date, ccaa, deaths, deaths_non_cum,
                cases, cases_non_cum) %>%
  left_join(regions_pop %>%
              bind_rows(
                world_pop %>% filter(country == 'Andorra') %>% dplyr::mutate(ccaa = country) %>%
                  dplyr::select(-region, -sub_region)
              )) %>%
  mutate(cases_per_million = cases / pop * 1000000,
         deaths_per_million = deaths / pop * 1000000) %>%
  mutate(cases_per_million_non_cum = cases_non_cum / pop * 1000000,
         deaths_per_million_non_cum = deaths_non_cum / pop * 1000000)

map_esp1 <- map_esp
map_esp1@data <- map_esp1@data %>% dplyr::select(ccaa)
map_fra1 <- map_fra
map_fra1@data <- map_fra1@data %>% dplyr::select(ccaa = NAME_1)
map_por1 <- map_por
map_por1@data <- map_por1@data %>% dplyr::select(ccaa = CCDR)
map_ita1 <- map_ita 
map_ita1@data <- map_ita1@data %>% dplyr::select(ccaa)
map_and1 <- map_and
map_and1@data <- map_and1@data %>% dplyr::select(ccaa = NAME_0)


map <- 
  rbind(map_esp1,
        map_por1,
        map_fra1,
        map_ita1,
        map_and1)

# Remove areas not of interest
centroids <- coordinates(map)
centroids <- data.frame(centroids)
names(centroids) <- c('x', 'y')
centroids$ccaa <- map@data$ccaa
centroids <- left_join(centroids, pd)
# map <- map_sp <- map[centroids$y >35 & centroids$x > -10 &
#              centroids$x < 8 & (centroids$y < 43  | map@data$ccaa %in% c('Occitanie', 'Nouvelle-Aquitaine') |
#                                   map@data$ccaa %in% esp_df$ccaa),]
# map_sp <- map <-  map[centroids$x > -10 & centroids$y <47,]
map_sp <- map <-  map[centroids$x > -10 & centroids$y <77,]

# map_sp <- map <-  map[centroids$x > -10,]

# fortify
map <- fortify(map, region = 'ccaa')

# join data
map$ccaa <- map$id
map <- left_join(map, pd)
var <- 'deaths_per_million'
map$var <- as.numeric(unlist(map[,var]))
centroids <- centroids[,c('ccaa', 'x', 'y', var)]
centroids <- centroids %>%
  filter(ccaa %in% map_sp@data$ccaa)

# cols <- rev(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))
# cols <- c('#A16928','#bd925a','#d6bd8d','#edeac2','#b5c8b8','#79a7ac','#2887a1')
# cols <- c('#009392','#39b185','#9ccb86','#e9e29c','#eeb479','#e88471','#cf597e')
# cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
cols <- rev(colorRampPalette(c('darkred', 'red', 'darkorange', 'orange', 'yellow', 'lightblue'))(10))
g <- ggplot(data = map,
         aes(x = long,
             y = lat,
             group = group)) +
    geom_polygon(aes(fill = var),
                 lwd = 0.3,
                 # color = 'darkgrey',
                 color = NA,
                 size = 0.6) +
      scale_fill_gradientn(name = '',
                           colours = cols) +
  # scale_fill_() +
  ggthemes::theme_map() +
  theme(legend.position = 'bottom',
        plot.title = element_text(size = 16)) +
  guides(fill = guide_colorbar(direction= 'horizontal',
                               barwidth = 50,
                               barheight = 1,
                               label.position = 'bottom')) +
  labs(title = 'Cumulative COVID-19 deaths per million population, western Mediterranean',
       subtitle = paste0('Data as of ', format(max(pd$date), '%B %d, %Y'))) +
  geom_text(data = centroids,
            aes(x = x,
                y = y,
                group = NA,
                label = paste0(ccaa, '\n',
                               round(deaths_per_million, digits = 2))),
            alpha = 0.8,
            size = 3)
g

ggsave('/tmp/map_with_borders.png',
       height = 8, width = 13)

Animation, Portugal, France, Spain, Italy

dir.create('/tmp/animation_map/')
pd <- por_df %>% mutate(country = 'Portugal') %>%
  bind_rows(esp_df %>% mutate(country = 'Spain')) %>%
  bind_rows(fra_df %>% mutate(country = 'France')) %>%
  bind_rows(ita %>% mutate(country = 'Italy')) %>%
  bind_rows(
    df %>% filter(country == 'Andorra') %>%
      mutate(ccaa = 'Andorra')
  )
pd %>% group_by(country) %>% summarise(max_date= max(date))
# A tibble: 5 x 2
  country  max_date  
  <chr>    <date>    
1 Andorra  2020-06-24
2 France   2020-05-06
3 Italy    2020-06-25
4 Portugal 2020-06-25
5 Spain    2020-06-21
unique_dates <- sort(unique(pd$date))
unique_dates <- unique_dates[unique_dates >= '2020-03-01']
popper <- regions_pop %>%
                bind_rows(
                  world_pop %>% filter(country == 'Andorra') %>% dplyr::mutate(ccaa = country) %>%
                    dplyr::select(-region, -sub_region)
                )


map_esp1 <- map_esp
map_esp1@data <- map_esp1@data %>% dplyr::select(ccaa)
map_fra1 <- map_fra
map_fra1@data <- map_fra1@data %>% dplyr::select(ccaa = NAME_1)
map_por1 <- map_por
map_por1@data <- map_por1@data %>% dplyr::select(ccaa = CCDR)
map_ita1 <- map_ita 
map_ita1@data <- map_ita1@data %>% dplyr::select(ccaa)
map_and1 <- map_and
map_and1@data <- map_and1@data %>% dplyr::select(ccaa = NAME_0)


map <- 
  rbind(map_esp1,
        map_por1,
        map_fra1,
        map_ita1,
        map_and1)

# Remove areas not of interest
centroids <- coordinates(map)
centroids <- data.frame(centroids)
names(centroids) <- c('x', 'y')
centroids$ccaa <- map@data$ccaa
# map <- map_sp <- map[centroids$y >35 & centroids$x > -10 &
#              # centroids$x < 8 &
#                (centroids$y < 43  | map@data$ccaa %in% c('Occitanie', 'Nouvelle-Aquitaine') |
#                                   map@data$ccaa %in% esp_df$ccaa),]
# map_sp <- map <-  map[centroids$x > -10 & centroids$y <47,]
map_sp <- map <-  map[centroids$x > -10 & centroids$y <477,]


# fortify
map <- fortify(map, region = 'ccaa')



for(i in 1:length(unique_dates)){
  this_date <- unique_dates[i]
    today_map <- map
    today_centroids <- centroids
    today_pd <- pd

  today_pd <- today_pd %>%
      mutate(ccaa = cat_transform(ccaa)) %>%
    group_by(ccaa) %>%
    # filter(date == max(date)) %>%
    filter(date == this_date) %>%
    ungroup %>%
    dplyr::select(date, ccaa, deaths, deaths_non_cum,
                  cases, cases_non_cum) %>%
    left_join(popper) %>%
    mutate(cases_per_million = cases / pop * 1000000,
           deaths_per_million = deaths / pop * 1000000) %>%
    mutate(cases_per_million_non_cum = cases_non_cum / pop * 1000000,
           deaths_per_million_non_cum = deaths_non_cum / pop * 1000000)
  
  today_centroids <- left_join(today_centroids, today_pd)

  
  # join data
  today_map$ccaa <- today_map$id
  today_map <- left_join(today_map, today_pd)
  var <- 'deaths_per_million'
  today_map$var <- as.numeric(unlist(today_map[,var]))
  today_map$var <- ifelse(is.na(today_map$var), 0, today_map$var)
  today_centroids <- today_centroids[,c('ccaa', 'x', 'y', var)]
  today_centroids <- today_centroids %>%
    filter(ccaa %in% today_map$ccaa)
  today_centroids$var <- today_centroids[,var]
  today_centroids$var <- ifelse(is.na(today_centroids$var), 0, today_centroids$var)

  
  # cols <- rev(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))
  # cols <- c('#A16928','#bd925a','#d6bd8d','#edeac2','#b5c8b8','#79a7ac','#2887a1')
  # cols <- c('#009392','#39b185','#9ccb86','#e9e29c','#eeb479','#e88471','#cf597e')
  # cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
  cols <- rev(colorRampPalette(c('darkred', 'red', 'darkorange', 'orange', 'yellow', 'white'))(17))
  g <- ggplot(data = today_map,
           aes(x = long,
               y = lat,
               group = group)) +
      geom_polygon(aes(fill = var),
                   lwd = 0.3,
                   # color = 'darkgrey',
                   color = NA,
                   size = 0.6) +
        scale_fill_gradientn(name = '',
                             colours = cols,
                             breaks = seq(0, 1100, 50),
                             limits = c(0, 1100)) +
    # scale_fill_() +
    ggthemes::theme_map() +
    theme(legend.position = 'right',
          plot.title = element_text(size = 24)) +
    guides(fill = guide_colorbar(direction= 'vertical',
                                 barwidth = 1,
                                 barheight = 30,
                                 label.position = 'left')) +
    labs(subtitle = 'Cumulative COVID-19 deaths per million population',
         title = paste0(format(this_date, '%B %d, %Y'))) +
    geom_text(data = today_centroids,
              aes(x = x,
                  y = y,
                  group = NA,
                  label = paste0(ccaa, '\n',
                                 round(var, digits = 2))),
              alpha = 0.8,
              size = 1.5)
  
  ggsave(paste0('/tmp/animation_map/', this_date, '.png'),
         plot = g,
         height = 6, width = 9)
}
# Command line
cd /tmp/animation_map
mogrify -resize 50% *.png
convert -delay 20 -loop 0 *.png result.gif

Deaths overall over time Spain

df_country %>% filter(country == 'Spain') %>% arrange(date) %>% tail
# A tibble: 6 x 10
# Groups:   country [1]
  country date        cases deaths   uci hospitalizations cases_non_cum
  <chr>   <date>      <dbl>  <dbl> <dbl>            <int>         <dbl>
1 Spain   2020-06-16 268348      0     0                0           208
2 Spain   2020-06-17 268646      0     0                0           298
3 Spain   2020-06-18 268856      0     0                0           210
4 Spain   2020-06-19 268922      0     0                0            66
5 Spain   2020-06-20 268976      0     0                0            54
6 Spain   2020-06-21 269010      0     0                0            34
# … with 3 more variables: deaths_non_cum <dbl>, uci_non_cum <dbl>, iso <chr>

Deaths yesterday

pd <- df_country
pd$value <- pd$deaths_non_cum
the_date <- Sys.Date() - 1
pd <- pd %>%
  filter(date == the_date) %>%
  dplyr::select(country, iso, cases, cases_non_cum,
                deaths, value) %>%
  dplyr::arrange(desc(value)) %>%
  left_join(world_pop %>% dplyr::select(-country)) %>%
  mutate(value_per_million = value / pop * 1000000) #%>% 
  # arrange(desc(value_per_million))
pd <- pd[1:10,]
pd$country <- factor(pd$country, levels = pd$country)
ggplot(data = pd,
       aes(x = country,
           y = value)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  geom_text(aes(label = value),
            nudge_y = -20,
            size = 4,
            color = 'white') +
  labs(title = paste0('Confirmed COVID-19 deaths on ', the_date),
       x = '', y = '')

pd
# A tibble: 10 x 10
# Groups:   country [10]
   country iso    cases cases_non_cum deaths value    pop region sub_region
   <fct>   <chr>  <dbl>         <dbl>  <dbl> <dbl>  <dbl> <chr>  <chr>     
 1 Brazil  BRA   1.19e6         42725  53830  1185 2.09e8 Ameri… Latin Ame…
 2 Mexico  MEX   1.97e5          5437  24324   947 1.26e8 Ameri… Latin Ame…
 3 US      USA   2.38e6         34471 121980   752 3.27e8 Ameri… Northern …
 4 India   IND   4.73e5         16922  14894   418 1.35e9 Asia   Southern …
 5 Chile   CHL   2.54e5          3649   4731   226 1.87e7 Ameri… Latin Ame…
 6 Peru    PER   2.65e5          3879   8586   182 3.20e7 Ameri… Latin Ame…
 7 Russia  RUS   6.06e5          7165   8503   154 1.44e8 Europe Eastern E…
 8 United… GBR   3.08e5           655  43165   154 6.65e7 Europe Northern …
 9 Pakist… PAK   1.93e5          4044   3903   148 2.12e8 Asia   Southern …
10 Iran    IRN   2.13e5          2531   9996   133 8.18e7 Asia   Southern …
# … with 1 more variable: value_per_million <dbl>

Deaths per million yesterday per CCAA

pd <- esp_df
pd$value <- pd$deaths_non_cum
the_date <- max(pd$date)
pd <- pd %>%
  filter(date == max(date)) %>%
  dplyr::select(ccaa, cases, cases_non_cum,
                deaths, value) %>%
  dplyr::arrange(desc(value)) %>%
  left_join(esp_pop)%>%
  mutate(value_per_million = value / pop * 1000000) #%>% 
  # arrange(desc(value_per_million))
pd <- pd[1:10,]
pd$ccaa <- factor(pd$ccaa, levels = pd$ccaa)
ggplot(data = pd,
       aes(x = ccaa,
           y = value)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  geom_text(aes(label = value),
            nudge_y = -20,
            size = 4,
            color = 'white')

pd
# A tibble: 10 x 7
   ccaa          cases cases_non_cum deaths value     pop value_per_million
   <fct>         <dbl>         <dbl>  <dbl> <dbl>   <dbl>             <dbl>
 1 Andalucía     16590             5      0     0 8414240                 0
 2 Aragón         6696             2      0     0 1319291                 0
 3 Asturias       2435             0      0     0 1022800                 0
 4 Baleares       2316             0      0     0 1149460                 0
 5 C. Valenciana 14782             2      0     0 5003769                 0
 6 Canarias       2494             0      0     0 2153389                 0
 7 Cantabria      2341             0      0     0  581078                 0
 8 Cataluña      55743            11      0     0 7675217                 0
 9 Ceuta           222             0      0     0   84777                 0
10 CLM           22122             1      0     0 2032863                 0

Deaths yesterday animation

dir.create('/tmp/animation_deaths')
dates <- seq(as.Date('2020-03-17'), (Sys.Date()-1), by = 1)
for(i in 1:length(dates)){
  this_date <- dates[i]
  pd <- df_country
  pd$value <- pd$deaths_non_cum
  pd <- pd %>%
    filter(date == max(this_date)) %>%
    dplyr::select(country, cases, cases_non_cum,
                  deaths, value) %>%
    dplyr::arrange(desc(value))
  pd <- pd[1:10,]
  pd <- pd %>% filter(value > 0)
  pd$country <- gsub(' ', '\n', pd$country)
  pd$country <- factor(pd$country, levels = pd$country)
  pd$color_var <- pd$country == 'Spain'
  if('Spain' %in% pd$country){
    cols <- rev(c('darkred', 'black'))
  } else {
    cols <- 'black'
  }
  g <- ggplot(data = pd,
         aes(x = country,
             y = value)) +
    geom_bar(stat = 'identity',
             aes(fill = color_var),
             alpha = 0.8,
             show.legend = FALSE) +
    theme_simple() +
    geom_text(aes(label = value),
              nudge_y = max(pd$value) * .05,
              size = 5,
              color = 'black') +
    labs(title = 'Daily (non-cumulative) COVID-19 deaths',
         subtitle = format(this_date, '%B %d')) +
    labs(x = 'Country',
         y = 'Deaths') +
    theme(axis.text = element_text(size = 15),
          plot.subtitle = element_text(size = 20)) +
    scale_fill_manual(name ='',
                      values = cols) +
    ylim(0, 900)
  ggsave(filename = paste0('/tmp/animation_deaths/', this_date, '.png'),
         g)
}
# Command line
cd /tmp/animation_deaths
mogrify -resize 50% *.png
convert -delay 50 -loop 0 *.png result.gif

Heatmap

pd <- by_country <-  esp_df %>% mutate(country = 'Spain') %>% 
  bind_rows(ita %>% mutate(country = 'Italy')) %>%
  bind_rows(por_df %>% mutate(country = 'Portugal')) %>%
  bind_rows(fra_df %>% mutate(country = 'France'))
pd$value <- pd$deaths_non_cum
max_date <- pd %>% group_by(country) %>% summarise(d = max(date)) %>% ungroup %>% summarise(d = min(d)) %>% .$d
# pd$value <- ifelse(is.na(pd$value), 0, pd$value)
left <- expand.grid(date = seq(min(pd$date),
                               max(pd$date),
                               by = 1),
                    ccaa = sort(unique(pd$ccaa)))
right <- pd %>% dplyr::select(date, ccaa, value)
pd <- left_join(left, right) %>% mutate(value = ifelse(is.na(value), NA, value))
pd <- left_join(pd, by_country %>% dplyr::distinct(country, ccaa)) %>%
  filter(date <= max_date) %>%
  filter(value > 0)
the_limits <- c(1, 600)
the_breaks <- c(1, seq(100, 600, length = 6)) #seq(0, 600, length = 7)
pd$ccaa <- factor(pd$ccaa, levels = rev(unique(sort(pd$ccaa))))
ggplot(data = pd,
       aes(x = date,
           y = ccaa,
           color = value,
           size = value)) +
  # geom_tile(color = 'white') +
  geom_point(alpha = 0.8) +
  # geom_tile() +
  scale_color_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)),
                        name = '',
                        limits = the_limits,
                        breaks = the_breaks) +
    # scale_fill_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)),
    #                     name = '',
    #                     limits = the_limits,
    #                     breaks = the_breaks) +
  scale_size_area(name = '', limits = the_limits, breaks = the_breaks, max_size = 10) +
  
  theme_simple() +
  facet_wrap(~country, scales = 'free_y') +
  theme(strip.text = element_text(size = 20),
        axis.title = element_blank(),
        axis.text = element_text(size = 10),
        axis.text.x = element_text(size = 12)) +
  guides(color = guide_legend(), size = guide_legend()) +
  labs(title = 'Daily (non-cumulative) COVID-19 deaths by sub-state regions',
       caption = paste0('Data as of ', max_date))

ggsave('/tmp/1.png',
       width = 10,
       height = 8)

Heatmap per population

pd <- by_country <-  esp_df %>% mutate(country = 'Spain') %>%  bind_rows(ita %>% mutate(country = 'Italy'))
poppy <- bind_rows(ita_pop, esp_pop)
pd <- pd %>% left_join(poppy)
pd$value <- pd$deaths_non_cum / pd$pop * 1000000
max_date <- pd %>% group_by(country) %>% summarise(d = max(date)) %>% ungroup %>% summarise(d = min(d)) %>% .$d
# pd$value <- ifelse(is.na(pd$value), 0, pd$value)
left <- expand.grid(date = seq(min(pd$date),
                               max(pd$date),
                               by = 1),
                    ccaa = sort(unique(pd$ccaa)))
right <- pd %>% dplyr::select(date, ccaa, value)
pd <- left_join(left, right) %>% mutate(value = ifelse(is.na(value), NA, value))
pd <- left_join(pd, by_country %>% dplyr::distinct(country, ccaa)) %>%
  filter(date <= max_date) %>%
  filter(value > 0)
the_limits <- c(1, 60)
the_breaks <- c(1, seq(10, 60, length = 6)) #seq(0, 600, length = 7)
pd$ccaa <- factor(pd$ccaa, levels = rev(unique(sort(pd$ccaa))))
ggplot(data = pd,
       aes(x = date,
           y = ccaa,
           color = value,
           size = value)) +
  # geom_tile(color = 'white') +
  geom_point(alpha = 0.8) +
  scale_color_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)),
                        name = '',
                        limits = the_limits,
                        breaks = the_breaks) +
  scale_size_area(name = '', limits = the_limits, breaks = the_breaks, max_size = 10) +
  theme_simple() +
  facet_wrap(~country, scales = 'free') +
  theme(strip.text = element_text(size = 26),
        axis.title = element_blank(),
        axis.text = element_text(size = 16),
        axis.text.x = element_text(size = 12)) +
  guides(color = guide_legend(), size = guide_legend()) +
  labs(title = 'Daily COVID-19 deaths per 1,000,000 population by sub-state regions',
       caption = paste0('Data as of ', max_date))

ggsave('/tmp/2.png',
       width = 10,
       height = 8)

Madrid vs rest of state

place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid',
                                      # ifelse(x == 'Cataluña', 'Cataluña',
                                             'Otras CCAA')
  # )
}
place_transform_ita <- function(x){
  ifelse(x == 'Lombardia', 'Lombardia', 
         # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 
                'Otras regiones italianas')
  # )
}
pd <- esp_df %>% mutate(country = 'España') %>%
  mutate(ccaa = place_transform(ccaa)) %>%
  bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa),
                           country = 'Italia')) %>%
  group_by(country, ccaa, date) %>% 
  summarise(cases = sum(cases),
            uci = sum(uci),
            deaths = sum(deaths),
            cases_non_cum = sum(cases_non_cum),
            deaths_non_cum = sum(deaths_non_cum),
            uci_non_cum = sum(uci_non_cum)) %>%
  left_join(esp_pop %>%
              mutate(ccaa = place_transform(ccaa)) %>%
              bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>%
              group_by(ccaa) %>%
              summarise(pop = sum(pop))) %>%
  mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>%
  group_by(country, date) %>%
  mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100,
         p_deaths_country = deaths / sum(deaths) * 100)
pd$ccaa <- factor(pd$ccaa,
                  levels = c('Madrid',
                             # 'Cataluña',
                             'Otras CCAA',
                             'Lombardia',
                             # 'Emilia Romagna',
                             'Otras regiones italianas'))
cols <- c(
  rev(brewer.pal(n = 3, 'Reds'))[1:2],
  rev(brewer.pal(n = 3, 'Blues'))[1:2]
)
ggplot(data = pd,
       aes(x = date,
           y = deaths_non_cum_p,
           fill = ccaa,
           group = ccaa)) +
  geom_bar(stat = 'identity',
           position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  # geom_line(size = 0.2,
  #           aes(color = ccaa)) +
  xlim(as.Date('2020-03-09'),
       Sys.Date()-1) +
  theme_simple() +
  labs(x = 'Fecha',
       y = 'Muertes diarias por 1.000.000',
       title = 'Muertes por 1.000.000 habitantes') +
  theme(legend.position = 'top') +
  geom_text(aes(label = round(deaths_non_cum_p, digits = 1),
                color = ccaa,
                y = deaths_non_cum_p + 2,
                group = ccaa),
            size = 2.5,
            angle = 90,
            position = position_dodge(width = 0.99))

label_data <- pd %>%
  filter(country  %in% c('Italia', 'España')) %>%
  group_by(country) %>%
  filter(date == max(date))  %>%
  mutate(label = gsub('\nitalianas', '',  gsub(' ', '\n', ccaa))) %>%
  mutate(x = date - 2,
         y = p_deaths_country + 
           ifelse(p_deaths_country > 50, 10, -10))
ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=50])) %>% filter(date >= start_day),
       aes(x = date,
           y = p_deaths_country,
           color = ccaa,
           group = ccaa)) +
  # geom_bar(stat = 'identity',
  #          position = position_dodge(width = 0.99)) +
  scale_fill_manual(name = '', values = cols) +
  scale_color_manual(name = '', values = cols) +
  geom_line(size = 2,
            aes(color = ccaa)) +
  geom_point(size = 3,
             aes(color = ccaa)) +
  facet_wrap(~country, scales = 'free_x') +
  # xlim(as.Date('2020-03-09'),
  #      Sys.Date()-1) +
  theme_simple() +
  geom_hline(yintercept = 50, lty = 2, alpha = 0.6) +
  # geom_line(lty = 2, alpha = 0.6) +
  labs(x = 'Fecha',
       y = 'Porcentaje de muertes',
       title = 'Porcentaje de muertes del país: región más afectada vs. resto del país',
       subtitle = 'A partir del primer día en cada país con 50 o más muertes') +
  theme(legend.position = 'top',
        strip.text = element_text(size = 30),
        legend.text = element_text(size = 10))  +
  guides(color = guide_legend(nrow = 2,
                              keywidth = 5)) +
  geom_text(data = label_data,
            aes(x = x - 2,
                y = y,
                label = label,
                color = ccaa),
            size = 6,
            show.legend = FALSE)

Italy regions, Spanish regions, Chinese regions (adjusted for population)

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
# Italian data
b <- ita %>%
  left_join(ita_pop) %>%
  mutate(country = 'Italy')
# Chinese data
d <- df %>% filter(country == 'China') %>%
  mutate(cases = cases) %>%
  mutate(ccaa = district) %>%
  mutate(country = 'China') %>%
  left_join(chi_pop)
# join
joined <- bind_rows(a, b, d)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

# Define plot data
pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>%
  mutate(country = ifelse(country == 'China',
                          'Hubei (China)',
                          ifelse(country == 'Italy', 'Italia', 'España')))

lombardy_location <- (max(pd_big$days_since_start_deaths_pm[pd_big$ccaa == 'Lombardia']))
Error in eval(expr, envir, enclos): object 'pd_big' not found
# Define label data
label_data <- pd %>% group_by(ccaa) %>% filter(
                                                          (
                                                            (country == 'Hubei (China)' & days_since_start_deaths_pm == 22) |
                                                            (date == max(date) & country == 'España' & deaths_pm > 40 & days_since_start_deaths_pm >= 7) & ccaa != 'CyL' |
                                                              (date == max(date) & country == 'Italia' &
                                                                 ccaa != 'Liguria' & days_since_start_deaths_pm > 15)
                                                          ))
# Get differential label data based on what to be emphasized
bigs <- c('Madrid', 'Lombardia', 'Hubei')
label_data_big <- label_data %>%
  filter(ccaa %in% bigs)
label_data_small <- label_data %>%
  filter(!ccaa %in% bigs)

pd_big <- pd %>%
  filter(ccaa %in% bigs)
pd_small <- pd %>%
  filter(!ccaa %in% bigs)

# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- c('darkred', '#FF6633', '#006666')

ggplot(data = pd_big,
       aes(x = days_since_start_deaths_pm,
           y = deaths_pm,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.9,
            size = 2) +
  geom_line(data = pd_small,
            aes(x = days_since_start_deaths_pm,
                y = deaths_pm,
                color = country),
            alpha = 0.7,
            size = 1) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c(cols)) +
  theme_simple() +
  theme(legend.position = 'top') +
  labs(x = 'Dias desde "el comienzo del brote"',
       y = 'Muertes por millón de habitantes',
       title = 'Muertes por 1.000.000 habitantes',
       subtitle = paste0('Dia 0 ("comienzo del brote") = primer día a ', x_deaths_pm, ' o más muertes acumuladas por milión de población\nLíneas rojas: CCAA; líneas verde-azules: regiones italianas; línea naranja: Hubei, China'),
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data_big,
            aes(x = days_since_start_deaths_pm + 0.6,
                y = (deaths_pm + 50),
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 8,
            alpha = 0.9,
            show.legend = FALSE) +
    geom_text(data = label_data_small,
            aes(x = days_since_start_deaths_pm + 0.6,
                y = deaths_pm  + (log(deaths_pm)/2),
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 5,
            alpha = 0.6,
            show.legend = FALSE) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 16),
        legend.text = element_text(size = 16),
        plot.title = element_text(size = 25))  +
  xlim(0, lombardy_location + 5)
Error in limits(c(...), "x"): object 'lombardy_location' not found

Italy regions, Spanish regions, Chinese regions (raw numbers, not adjusted for population)

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
# Italian data
b <- ita %>%
  left_join(ita_pop) %>%
  mutate(country = 'Italy')
# Chinese data
d <- df %>% filter(country == 'China') %>%
  mutate(cases = cases) %>%
  mutate(ccaa = district) %>%
  mutate(country = 'China') %>%
  left_join(chi_pop)
# join
joined <- bind_rows(a, b, d)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

# Define plot data
pd <- joined %>% filter(days_since_start_deaths >= 0) %>%
  mutate(country = ifelse(country == 'China',
                          'China',
                          ifelse(country == 'Italy', 'Italia', 'España')))

# Define label data
label_data <- pd %>% group_by(ccaa) %>% filter(
                                                          (
                                                            (country == 'China' & deaths >10 & days_since_start_deaths == 29) |
                                                            (date == max(date) & country == 'España' & deaths > 90) |
                                                              (date == max(date) & country == 'Italia' &
                                                                 ccaa != 'Liguria' & days_since_start_deaths > 10)
                                                          ))
# Get differential label data based on what to be emphasized
label_data_big <- label_data %>%
  filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
label_data_small <- label_data %>%
  filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))

pd_big <- pd %>%
  filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
pd_small <- pd %>%
  filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))

# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- c( '#FF6633',  'darkred', '#006666')
ggplot(data = pd_big,
       aes(x = days_since_start_deaths,
           y = deaths,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.9,
            size = 2) +
  geom_line(data = pd_small,
            aes(x = days_since_start_deaths,
                y = deaths,
                color = country),
            alpha = 0.7,
            size = 1) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c(cols)) +
  theme_simple() +
  theme(legend.position = 'top') +
  labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas',
       y = 'Muertes',
       title = 'Muertes por COVID-19',
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data_big,
            aes(x = days_since_start_deaths + 1.6,
                y = ifelse(ccaa == 'Hubei', (deaths -500),
                           ifelse(ccaa == 'Lombardia',  (deaths + 700),
                                   (deaths + 300))),
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 8,
            alpha = 0.9,
            show.legend = FALSE) +
    geom_text(data = label_data_small,
            aes(x = days_since_start_deaths + 1.6,
                align = 'left',
                y = deaths ,
                label = ccaa,
                # label = gsub(' ', '\n', ccaa),
                color = country),
            size = 5,
            alpha = 0.6,
            show.legend = FALSE) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20),
        legend.text = element_text(size = 16),
        plot.title = element_text(size = 30))  +
  xlim(0, 50)

ANIMATION: Italy regions, Spanish regions, Chinese regions (raw numbers, not adjusted for population)

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
# Italian data
b <- ita %>%
  left_join(ita_pop) %>%
  mutate(country = 'Italy')
# Chinese data
d <- df %>% filter(country == 'China') %>%
  mutate(cases = cases) %>%
  mutate(ccaa = district) %>%
  mutate(country = 'China') %>%
  left_join(chi_pop)
# join
joined <- bind_rows(a, b, d)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

# Define plot data
pd <- joined %>% filter(days_since_start_deaths >= 0) %>%
  mutate(country = ifelse(country == 'China',
                          'China',
                          ifelse(country == 'Italy', 'Italia', 'España')))


add_zero <- function(x, n){
  x <- as.character(x)
  adders <- n - nchar(x)
  adders <- ifelse(adders < 0, 0, adders)
  for (i in 1:length(x)){
    if(!is.na(x[i])){
      x[i] <- paste0(
        paste0(rep('0', adders[i]), collapse = ''),
        x[i],
        collapse = '')  
    } 
  }
  return(x)
}
# # Define label data
# label_data <- pd %>% group_by(ccaa) %>% filter(
#                                                           (
#                                                             (country == 'China' & deaths >10 & days_since_start_deaths == 29) |
#                                                             (date == max(date) & country == 'España' & deaths > 90) |
#                                                               (date == max(date) & country == 'Italia' &
#                                                                  ccaa != 'Liguria' & days_since_start_deaths > 10)
#                                                           ))
# # Get differential label data based on what to be emphasized
# label_data_big <- label_data %>%
#   filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
# label_data_small <- label_data %>%
#   filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
# 
pd_big <- pd %>%
  filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))
pd_small <- pd %>%
  filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei'))



# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- c( '#FF6633',  'darkred', '#006666')

the_dir <- '/tmp/animation/'
dir.create(the_dir)
the_dates <- sort(unique(c(pd_big$date, pd_small$date)))
for(i in 1:length(the_dates)){
  
  the_date <- the_dates[i]
  pd_big_sub <- pd_big %>% filter(date <= the_date)
  pd_big_current <- pd_big_sub %>% filter(date == the_date)
  pd_small_sub <- pd_small %>% filter(date <= the_date)
  pd_small_current <- pd_small_sub %>% filter(date == the_date)

  label_data_big <-
    pd_big_sub %>%
    filter(ccaa %in% c('Lombardia', 'Madrid', 'Hubei')) %>%
    group_by(ccaa) %>%
    filter(date == max(date)) %>%
    ungroup %>%
    mutate(days_since_start_deaths = ifelse(ccaa == 'Hubei' &
                                              days_since_start_deaths >32,
                                            32,
                                            days_since_start_deaths))
  
  label_data_small <-
    pd_small_sub %>%
    filter(ccaa %in% c('Emilia Romagna',
                       'Cataluña',
                       'CLM',
                       'País Vasco',
                       'Veneto',
                       'Piemonte',
                       'Henan',
                       'Heilongjiang')) %>%
    group_by(ccaa) %>%
    filter(date == max(date))

  n_countries <- length(unique(pd_big_sub$country))
  if(n_countries == 3){
    sub_cols  <- cols
  }
  if(n_countries == 2){
    sub_cols <- cols[c(1,3)]
  }
   if(n_countries == 1){
    sub_cols <- cols[1]
  }
  g <- ggplot(data = pd_big_sub,
       aes(x = days_since_start_deaths,
           y = deaths,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.9,
            size = 2) +
  geom_line(data = pd_small_sub,
            aes(x = days_since_start_deaths,
                y = deaths,
                color = country),
            alpha = 0.7,
            size = 1) +
    geom_point(data = pd_big_current,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = country),
               size = 3) +
    geom_point(data = pd_small_current,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = country),
               size = 1, alpha = 0.6) +
    scale_y_log10(limits = c(5, 4500)) +
  scale_color_manual(name = '',
                     values = sub_cols) +
  theme_simple() +
  theme(legend.position = 'top') +
  labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas',
       y = 'Muertes',
       title = format(the_date, '%d %b'),
       subtitle = 'Muertes por COVID-19',
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data_big,
            aes(x = days_since_start_deaths + 1,
                y = deaths,
                hjust = 0,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 8,
            alpha = 0.9,
            show.legend = FALSE) +
    geom_text(data = label_data_small,
            aes(x = days_since_start_deaths + 1.6,
                y = deaths ,
                label = ccaa,
                # label = gsub(' ', '\n', ccaa),
                color = country),
            size = 5,
            alpha = 0.6,
            show.legend = FALSE) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20),
        legend.text = element_text(size = 16),
        plot.title = element_text(size = 35),
        plot.subtitle = element_text(size = 24))  +
  xlim(0, 38) 
  message(i)
  ggsave(paste0(the_dir, add_zero(i, 3), '.png'),
         height = 7,
         width = 10.5)
}
# Command line
cd /tmp/animation
mogrify -resize 50% *.png
convert -delay 20 -loop 0 *.png result.gif

ANIMATION: Spain only

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
joined <- a
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

# Define plot data
pd <- joined %>% filter(days_since_start_deaths >= 0) %>%
  mutate(country = ifelse(country == 'China',
                          'China',
                          ifelse(country == 'Italy', 'Italia', 'España')))

bigs <- c('Madrid', 'Cataluña', 'CLM', 'CyL', 'País Vasco', 'La Rioja')
pd_big <- pd %>%
  filter(ccaa %in% bigs)
pd_small <- pd %>%
  filter(!ccaa %in% bigs)



# cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country)))
# cols <- rainbow(3)
cols <- colorRampPalette(c('#A16928','#bd925a','#d6bd8d','#edeac2', '#b5c8b8','#79a7ac','#2887a1'))(length(unique(pd$country)))

the_dir <- '/tmp/animation2/'
dir.create(the_dir)
the_dates <- sort(unique(c(pd_big$date, pd_small$date)))
for(i in 1:length(the_dates)){
  
  the_date <- the_dates[i]
  pd_big_sub <- pd_big %>% filter(date <= the_date)
  pd_big_current <- pd_big_sub %>% filter(date == the_date)
  pd_small_sub <- pd_small %>% filter(date <= the_date)
  pd_small_current <- pd_small_sub %>% filter(date == the_date)

  label_data_big <-
    pd_big_sub %>%
    filter(ccaa %in% bigs) %>%
    group_by(ccaa) %>%
    filter(date == max(date)) %>%
    ungroup
  
  label_data_small <-
    pd_small_sub %>%
    group_by(ccaa) %>%
    filter(date == max(date))
# sub_cols <- colorRampPalette(c('#A16928','#bd925a','#d6bd8d','#edeac2', '#b5c8b8','#79a7ac','#2887a1'))(length(unique(pd$ccaa)))
  sub_cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Dark2'))(length(unique(pd$ccaa)))
  # sub_cols <- rainbow((length(unique(pd$ccaa))))
  
  g <- ggplot(data = pd_big_sub,
       aes(x = days_since_start_deaths,
           y = deaths,
           group = ccaa)) +
  geom_line(aes(color = ccaa),
            alpha = 0.9,
            size = 2) +
  geom_line(data = pd_small_sub,
            aes(x = days_since_start_deaths,
                y = deaths,
                color = ccaa),
            alpha = 0.7,
            size = 1) +
    geom_point(data = pd_big_current,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = ccaa),
               size = 3) +
    geom_point(data = pd_small_current,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = ccaa),
               size = 1, alpha = 0.6) +
    geom_point(data = pd,
               aes(x = days_since_start_deaths,
                y = deaths,
                color = ccaa),
               size = 1, alpha = 0.01) +
    scale_y_log10(limits = c(5, max(pd$deaths)*1.2),
                  breaks = c(10, 50, 100, 500, 1000)) +
  scale_color_manual(name = '',
                     values = sub_cols) +
  theme_simple() +
  theme(legend.position = 'top') +
  labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas',
       y = 'Muertes',
       title = format(the_date, '%d %b'),
       subtitle = 'Muertes por COVID-19',
       caption = '@joethebrew | www.databrew.cc')   +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20),
        legend.text = element_text(size = 16),
        plot.title = element_text(size = 35),
        plot.subtitle = element_text(size = 24))  +
  xlim(0, 20) +
    theme(legend.position = 'none')
  message(i)
  if(nrow(label_data_big) > 0){
    g <- g +
      geom_text(data = label_data_big,
            aes(x = days_since_start_deaths + 0.2,
                y = deaths,
                hjust = 0,
                label = gsub(' ', ' ', ccaa),
                color = ccaa),
            size = 8,
            alpha = 0.9,
            show.legend = FALSE) +
    geom_text(data = label_data_small,
            aes(x = days_since_start_deaths + 0.2,
                y = deaths ,
                label = ccaa,
                # label = gsub(' ', '\n', ccaa),
                color = ccaa),
            size = 5,
            alpha = 0.6,
            show.legend = FALSE)
  }
  
  ggsave(paste0(the_dir, add_zero(i, 3), '.png'),
         height = 7,
         width = 12)
}
# Command line
cd /tmp/animation
mogrify -resize 50% *.png
convert -delay 25 -loop 0 *.png result.gif

Italy regions for Spanish regions

# Spanish data
a <- esp_df %>%
  left_join(esp_pop) %>%
  mutate(country = 'Spain')
# Italian data
b <- ita %>%
  left_join(ita_pop) %>%
  mutate(country = 'Italy')
# join
joined <- bind_rows(a, b)
# Get deaths per milllion
joined$deaths_pm <- joined$deaths / joined$pop * 1000000
joined$cases_pm <- joined$cases / joined$pop * 1000000

# Get the days since paradigm
x_deaths <- 5
x_deaths_pm <- 5
x_cases <- 50
x_cases_pm <- 50
joined <- joined %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(start_deaths = min(date[deaths >= x_deaths]),
         start_cases = min(date[cases >= x_cases]),
         start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]),
         start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>%
  ungroup %>%
  mutate(days_since_start_deaths = date - start_deaths,
         days_since_start_cases = date - start_cases,
         days_since_start_deaths_pm = date - start_deaths_pm,
         days_since_start_cases_pm = date - start_cases_pm) 

ggplot(data = joined %>% filter(days_since_start_deaths_pm >= 0),
       aes(x = days_since_start_deaths_pm,
           y = deaths_pm,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.8,
            size = 2) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c('darkorange', 'purple')) +
  theme_simple() +
  theme(legend.position = 'none') +
  labs(x = 'Days since "start out outbreak"',
       y = 'Deaths per million',
       title = 'Deaths per capita, Italian regions vs. Spanish autonomous communities',
       subtitle = paste0('Day 0 ("start of outbreak") = first day at ', x_deaths_pm, ' or greater cumulative deaths per million'),
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = joined %>% group_by(ccaa) %>% filter(date == max(date) & 
                                                          (
                                                            (country == 'Spain' & deaths_pm > 25) |
                                                              (country == 'Italy' & days_since_start_deaths_pm > 10)
                                                          )),
            aes(x = days_since_start_deaths_pm + 0.6,
                y = deaths_pm,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 6) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20)) +
  xlim(0, 23)

ggsave('/tmp/italy_comparison.png',
       height = 6,
       width = 10)


# Separate for Catalonia
pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>%
         mutate(country = ifelse(ccaa == 'Cataluña',
                                 'Catalonia',
                                 country)) %>%
  mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalunya', ccaa))
pdcat <- pd %>% filter(country == 'Catalonia')
label_data <- pd %>% group_by(ccaa) %>% filter(date == max(date) & 
                                                          (
                                                            (country == 'Catalonia') |
                                                            (country == 'Spain' & deaths_pm > 25) |
                                                              (country == 'Italy' & days_since_start_deaths_pm > 10)
                                                          ))
ggplot(data = pd,
       aes(x = days_since_start_deaths_pm,
           y = deaths_pm,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.3,
            size = 1.5) +
    geom_line(data = pdcat,
              aes(color = country),
            alpha = 0.8,
            size = 2) +
      geom_point(data = pdcat %>% filter(date == max(date)),
              aes(color = country),
            alpha = 0.8,
            size = 4) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c('darkred', 'darkorange', "purple")) +
  theme_simple() +
  theme(legend.position = 'none') +
  labs(x = 'Dies des del "començament del brot"',
       y = 'Morts per milió',
       title = 'Morts per càpita: Catalunya, comunitats autònomes, regions italianes',
       subtitle = paste0('Dia 0 ("començament del brot") = primer dia a ', x_deaths_pm, ' o més morts acumulades per milió de població'),
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data,
            aes(x = days_since_start_deaths_pm +0.2 ,
                y = deaths_pm +3,
                hjust = 0,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 6,
            alpha = 0.7) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20)) +
  xlim(0, 24)

ggsave('/tmp/cat_italy_comparison.png',
       height = 6,
       width = 10)


# Straightforward Lombardy, Madrid, Cat comparison
specials <- c('Lombardia', 'Madrid')
pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>%
         mutate(country = ifelse(ccaa == 'Cataluña',
                                 'Catalonia',
                                 country)) %>%
  mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalunya', ccaa))
pdcat <- pd %>% filter(ccaa %in%  specials)
label_data <- pd %>% group_by(ccaa) %>% filter(date == max(date) & 
                                                          (
                                                            # (country == 'Catalonia') |
                                                            (country == 'Spain' & deaths_pm > 20) |
                                                              (country == 'Italy' & days_since_start_deaths_pm >= 10)
                                                          ))
ggplot(data = pd,
       aes(x = days_since_start_deaths_pm,
           y = deaths_pm,
           group = ccaa)) +
  geom_line(aes(color = country),
            alpha = 0.3,
            size = 1.5) +
    geom_line(data = pdcat,
              aes(color = country),
            alpha = 0.8,
            size = 2) +
  scale_y_log10() +
  scale_color_manual(name = '',
                     values = c('darkred', 'darkorange', "purple")) +
  theme_simple() +
  theme(legend.position = 'none') +
  labs(x = 'Dias desde "el comienzo del brote"',
       y = 'Muertes por millón de habitantes',
       title = 'Muertes acumuladas por 1.000.000 habitantes',
       subtitle = paste0('Dia 0 ("comienzo del brote") = primer día a ', x_deaths_pm, ' o más muertes acumuladas por milión de población'),
       caption = '@joethebrew | www.databrew.cc') +
  geom_text(data = label_data %>% filter(!ccaa %in% specials),
            aes(x = days_since_start_deaths_pm + 0.4,
                y = deaths_pm +3,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 5,
            alpha = 0.5) +
    geom_text(data = label_data %>% filter(ccaa %in% specials),
            aes(x = days_since_start_deaths_pm ,
                y = deaths_pm +30,
                label = gsub(' ', '\n', ccaa),
                color = country),
            size = 8,
            alpha = 0.8) +
  theme(axis.text = element_text(size = 14),
        axis.title = element_text(size = 20)) +
  xlim(0, 23)

ggsave('/tmp/mad_lom_italy_comparison.png',
       height = 6,
       width = 10)

Loop for regions of the world

isos <- sort(unique(world_pop$sub_region))
isos <- c('Central Asia', 'Eastern Asia', 'Eastern Europe',
          'Latin America and the Caribbean',
          'Northern Africa', 'Northern America',
          'Nothern Europe',
          'South-eastern Asia',
          'Southern Asia', 'Southern Europe',
          'Sub-Saharan Africa', 'Western Asia', 'Western Europe')
dir.create('/tmp/world')
for(i in 1:length(isos)){
  this_iso <- isos[i]
  message(i, ' ', this_iso)
  countries <- world_pop %>% filter(sub_region == this_iso)
  pd <- df %>%
    filter(!country %in% c('Guyan
                           a', 'Bahamas', 'The Bahamas')) %>%
          group_by(country, iso, date) %>%
          summarise(cases = sum(cases, na.rm = TRUE)) %>%
    ungroup %>%
    group_by(country) %>%
         filter(length(which(cases > 0)) > 1) %>%
    ungroup %>%
         filter(iso %in% countries$iso)
  if(nrow(pd) > 0){
    cols <- colorRampPalette(brewer.pal(n = 8, 'Spectral'))(length(unique(pd$country)))
cols <- sample(cols, length(cols))
    # Plot
n_countries <- (length(unique(pd$country)))
ggplot(data = pd,
       aes(x = date,
           # color = country,
           # fill = country,
           y = cases)) +
  theme_simple() +
  # geom_point() +
  # geom_line() +
  geom_area(fill = 'darkred', alpha = 0.3, color = 'darkred') +
  # scale_color_manual(name = '',
  #                    values = cols) +
  # scale_fill_manual(name = '',
  #                   values = cols) +
  theme(legend.position = 'none',
        axis.text = element_text(size = 6),
        strip.text = element_text(size = ifelse(n_countries > 20, 6,
                                                ifelse(n_countries > 10, 10,
                                                       ifelse(n_countries > 5, 11, 12))) ),
        legend.text = element_text(size = 6)) +
  # scale_y_log10() +
  facet_wrap(~country,
             scales = 'free') +
  labs(x = '',
       y = 'Confirmed cases',
       title = paste0('Confirmed cases of COVID-19 in ', this_iso)) 
  ggsave(paste0('/tmp/world/', this_iso, '.png'),
         width = 12, 
         height = 7)
  }



}

Rolling average new events

plot_data <- df_country %>% filter(country %in% c('Spain', 'France', 'Italy', 'Germany', 'Belgium', 'Norway')) %>% mutate(geo = country)
roll_curve(plot_data, pop = T)

dir.create('/tmp/countries')
roll_curve_country <- function(the_country = 'Spain'){
  plot_data <- df_country %>% filter(country %in% the_country) %>% mutate(geo = country)
  g1 <- roll_curve(plot_data, pop = F)
  g2 <- roll_curve(plot_data, pop = T)
  g3 <- roll_curve(plot_data, pop = F, deaths = T)
  g4 <- roll_curve(plot_data, pop = T, deaths = T)
  ggsave(paste0('/tmp/countries/', the_country, '1.png'), g1)
  ggsave(paste0('/tmp/countries/', the_country, '2.png'), g2)
  ggsave(paste0('/tmp/countries/', the_country, '3.png'), g3)
  ggsave(paste0('/tmp/countries/', the_country, '4.png'), g4)
}


countries <- c('Spain', 'France', 'Italy', 'Germany', 'Belgium', 'Norway', 'US', 'United Kingdom', 'Korea, South',
  'China', 'Japan', 'Switzerland', 'Sweden', 'Denmark', 'Netherlands', 'Iran', 'Canada')
for(i in 1:length(countries)){
  roll_curve_country(the_country = countries[i])
}
Error: Can't subset elements that don't exist.
✖ Location 1 doesn't exist.
ℹ There are only 0 elements.
# Cases
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data)

# Cases adjusted
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, pop = T)

# Deaths
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, deaths = T)

# Cases adjusted
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)
roll_curve(plot_data, pop = T, deaths = T)

plot_data <- esp_df  %>% mutate(geo = ccaa)

roll_curve(plot_data, pop = T, deaths = T)

plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country)

roll_curve(plot_data, deaths = T)

# Latest in Spain
pd <- esp_df %>%
  filter(date == max(date)) %>%
  mutate(p = deaths / sum(deaths) * 100)
text_size <- 12

# deaths
ggplot(data = pd,
       aes(x = ccaa,
           y = deaths)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = '',
       y = 'Deaths | Muertes',
       title = 'COVID-19 deaths in Spain',
       subtitle = paste0('Data as of ', max(pd$date)),
       caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
  theme(legend.position = 'top',
        legend.text = element_text(size = text_size * 2),
        axis.title = element_text(size = text_size * 2),
        plot.title = element_text(size = text_size * 2.3),
        axis.text.x = element_text(size = text_size * 1.5)) +
  geom_text(data = pd %>% filter(deaths > 0),
            aes(x = ccaa,
                y = deaths,
                label = paste0(deaths, '\n(',
                               round(p, digits = 1), '%)')),
            size = text_size * 0.3,
            nudge_y = 180) +
  ylim(0, max(pd$deaths * 1.1))
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

ggsave('/tmp/spain.png')
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

Muertes relativas por CCAA

# Latest in Spain
pd <- esp_df %>%
  filter(date == max(date)) %>%
  mutate(p = deaths / sum(deaths) * 100)

pd <- pd %>% left_join(esp_pop)
text_size <- 12
pd$value <- pd$deaths / pd$pop * 100000

# deaths
ggplot(data = pd,
       aes(x = ccaa,
           y = value)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = '',
       y = 'Deaths per 100,000',
       title = 'COVID-19 deaths per 100.000',
       subtitle = paste0('Data as of ', max(pd$date)),
       caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
  theme(legend.position = 'top',
        legend.text = element_text(size = text_size * 2),
        axis.title = element_text(size = text_size * 2),
        plot.title = element_text(size = text_size * 2.3),
        axis.text.x = element_text(size = text_size * 1.5)) +
  geom_text(data = pd %>% filter(value > 0),
            aes(x = ccaa,
                y = value,
                label = paste0(round(value, digits = 2), '\n(',
                               deaths, '\ndeaths)')),
            size = text_size * 0.3,
            nudge_y = 4.5) +
  ylim(0, max(pd$value) * 1.2)
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

ggsave('/tmp/spai2.png')
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

Just yesterday

# Latest in Spain
pd <- esp_df %>%
  filter(date == max(date)) %>%
  mutate(p = deaths_non_cum / sum(deaths_non_cum) * 100)
text_size <- 12

# deaths
ggplot(data = pd,
       aes(x = ccaa,
           y = deaths_non_cum)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = '',
       y = 'Deaths',
       title = 'COVID-19 deaths in Spain',
       subtitle = paste0('Data only for ', max(pd$date)),
       caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
  theme(legend.position = 'top',
        legend.text = element_text(size = text_size * 2),
        axis.title = element_text(size = text_size * 2),
        plot.title = element_text(size = text_size * 2.3),
        axis.text.x = element_text(size = text_size * 1.5)) +
  geom_text(data = pd %>% filter(deaths_non_cum > 0),
            aes(x = ccaa,
                y = deaths_non_cum,
                label = paste0(deaths_non_cum, '\n(',
                               round(p, digits = 1), '%)')),
            size = text_size * 0.3,
            nudge_y = 30) +
  ylim(0, max(pd$deaths_non_cum * 1.1))
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

ggsave('/tmp/spain_non_cum.png')
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

Muertes relativas por CCAA ayer SOLO

# Latest in Spain
pd <- esp_df %>%
  filter(date == max(date)) %>%
  mutate(p = deaths_non_cum / sum(deaths_non_cum) * 100)

pd <- pd %>% left_join(esp_pop)
text_size <- 12
pd$value <- pd$deaths_non_cum / pd$pop * 100000

# deaths
ggplot(data = pd,
       aes(x = ccaa,
           y = value)) +
  geom_bar(stat = 'identity',
           fill = 'black') +
  theme_simple() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(x = '',
       y = 'Deaths per 100,000',
       title = 'COVID-19 deaths per 100.000',
       subtitle = paste0('Data as of ', max(pd$date)),
       caption = 'github.com/databrew/covid19 | joe@databrew.cc') +
  theme(legend.position = 'top',
        legend.text = element_text(size = text_size * 2),
        axis.title = element_text(size = text_size * 2),
        plot.title = element_text(size = text_size * 2.3),
        axis.text.x = element_text(size = text_size * 1.5)) +
  geom_text(data = pd %>% filter(value > 0),
            aes(x = ccaa,
                y = value,
                label = paste0(round(value, digits = 2), '\n(',
                               deaths_non_cum, '\ndeaths)')),
            size = text_size * 0.3,
            nudge_y = 1) +
  ylim(0, max(pd$value) * 1.3)
Error: Aesthetics must be either length 1 or the same as the data (1): x and y

ggsave('/tmp/spain_ayer_adj.png')
Error: Aesthetics must be either length 1 or the same as the data (1): x and y
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, scales = 'fixed')

ggsave('/tmp/a.png',
       width = 1280 / 150,
       height = 720 / 150)

Loop for everywhere (see desktop)

dir.create('/tmp/ccaas')
ccaas <- sort(unique(esp_df$ccaa))
for(i in 1:length(ccaas)){
  this_ccaa <- ccaas[i]
  plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
  roll_curve(plot_data, scales = 'fixed')  + theme(strip.text = element_text(size = 30))
  ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_cases.png'),
         width = 1280 / 150,
         height = 720 / 150)
}

ccaas <- sort(unique(esp_df$ccaa))
for(i in 1:length(ccaas)){
  this_ccaa <- ccaas[i]
  plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
  roll_curve(plot_data, scales = 'fixed', pop = TRUE)  + theme(strip.text = element_text(size = 30))
  ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_cases_pop.png'),
         width = 1280 / 150,
         height = 720 / 150)
}

# Deaths too
for(i in 1:length(ccaas)){
  this_ccaa <- ccaas[i]
  plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
  roll_curve(plot_data, deaths = T, scales = 'fixed') + theme(strip.text = element_text(size = 30))
  ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_deaths.png'),
         width = 1280 / 150,
         height = 720 / 150)
}

# Deaths too
for(i in 1:length(ccaas)){
  this_ccaa <- ccaas[i]
  plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa)
  roll_curve(plot_data, deaths = T, scales = 'fixed', pop = TRUE)  + theme(strip.text = element_text(size = 30))
  ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_deaths_pop.png'),
         width = 1280 / 150,
         height = 720 / 150)
}
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, scales = 'free_y')

ggsave('/tmp/b.png',
       width = 1280 / 150,
       height = 720 / 150)
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, deaths = T, scales = 'free_y')

ggsave('/tmp/c.png',
       width = 1280 / 150,
       height = 720 / 150)
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla'))
roll_curve(plot_data, deaths = T, scales = 'fixed')

ggsave('/tmp/d.png',
       width = 1280 / 150,
       height = 720 / 150)
plot_data <- df_country %>% filter(country %in% c('Spain', 'Italy', 'Germany', 'France', 'US',
                                                  'China', 'Korea, South', 'Japan', 'Singapore')) %>% mutate(geo = country)
roll_curve(plot_data, scales = 'free_y')

ggsave('/tmp/z.png',
       width = 1280 / 150,
       height = 720 / 150)

World at large

pd <- df_country %>%
  group_by(date) %>%
  summarise(n = sum(cases)) %>%
  filter(date < max(date))
ggplot(data = pd,
       aes(x = date,
           y = n)) +
  geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Cases',
       title = 'COVID-19 cases')

ggsave('~/Videos/update/a.png',
       width = 1280 / 150,
       height = 720 / 150)
Error in grid.newpage(): could not open file '/home/joebrew/Videos/update/a.png'

China vs world

pd <- df_country %>%
  group_by(date,
           country = ifelse(country == 'China', 'China', 'Other countries')) %>%
  summarise(n = sum(cases))  %>%
  ungroup %>%
  filter(date < max(date))
ggplot(data = pd,
       aes(x = date,
           y = n,
           color = country)) +
  geom_line(size = 2) +
  # geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Cases',
       title = 'COVID-19 cases') +
  scale_color_manual(name = '',
                     values = c('red', 'black')) +
  theme(legend.text = element_text(size = 25),
        legend.position = 'top')

ggsave('~/Videos/update/b.png',
       width = 1280 / 150,
       height = 720 / 150)
Error in grid.newpage(): could not open file '/home/joebrew/Videos/update/b.png'

NOn china only

pd <- df_country %>%
  group_by(date,
           country = ifelse(country == 'China', 'China', 'Other countries')) %>%
  summarise(n = sum(cases)) %>%
  filter(country == 'Other countries')  %>%
  ungroup %>%
  filter(date < max(date))
ggplot(data = pd,
       aes(x = date,
           y = n)) +
  geom_line(size = 2) +
  # geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Cases',
       title = 'COVID-19 cases, outside of China') 

ggsave('~/Videos/update/c.png',
       width = 1280 / 150,
       height = 720 / 150)
Error in grid.newpage(): could not open file '/home/joebrew/Videos/update/c.png'

Case numbers across countries

plot_day_zero(countries = c('France', 'Germany', 'Italy', 'Spain', 'Switzerland', 'Sweden', 'Norway', 'Netherlands'))

# ggsave('~/Videos/update/d.png',
#        width = 1280 / 150,
#        height = 720 / 150)

World at large - deaths

pd <- df_country %>%
  group_by(date) %>%
  summarise(n = sum(deaths)) %>%
  filter(date < max(date))
ggplot(data = pd,
       aes(x = date,
           y = n)) +
  geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Deaths',
       title = 'COVID-19 deaths')

# ggsave('~/Videos/update/e.png',
#        width = 1280 / 150,
#        height = 720 / 150)

China vs world deaths

pd <- df_country %>%
  group_by(date,
           country = ifelse(country == 'China', 'China', 'Other countries')) %>%
  summarise(n = sum(deaths))  %>%
  ungroup %>%
  filter(date < max(date))
ggplot(data = pd,
       aes(x = date,
           y = n,
           color = country)) +
  geom_line(size = 2) +
  # geom_point() +
  theme_simple() +
  labs(x = 'Date',
       y = 'Deaths',
       title = 'COVID-19 deaths') +
  scale_color_manual(name = '',
                     values = c('red', 'black')) +
  theme(legend.text = element_text(size = 25),
        legend.position = 'top')

# ggsave('~/Videos/update/f.png',
#        width = 1280 / 150,
#        height = 720 / 150)

Asian hope

plot_day_zero(countries = c('Korea, South', 'Japan', 'China', 'Singapore'))

# ggsave('~/Videos/update/g.png',
#        width = 1280 / 150,
#        height = 720 / 150)

Since trajectories are very unstable when cases are low, we’ll exclude from our analysis the first few days, and will only count as “outbreak” once a country reaches 150 or more cumulative cases.

# Doubling time
n_cases_start = 150
countries = c('Italy', 'Spain', 'France', 'Germany', 'Italy', 'Switzerland', 'Denmark', 'US', 'United Kingdom', 'Norway')
# countries <- sort(unique(df_country$country))
out_list <- curve_list <-  list()
counter <- 0
for(i in 1:length(countries)){
  message(i)
  this_country <- countries[i]
  sub_data <-df_country %>% filter(country == this_country)
  # Only calculate on countries with n_cases_start or greater cases,
  # starting at the first day at n_cases_start or greater
  ok <- max(sub_data$cases, na.rm = TRUE) >= n_cases_start
  if(ok){
    counter <- counter + 1
    pd <- sub_data %>%
      filter(!is.na(cases)) %>%
      mutate(start_date = min(date[cases >= n_cases_start])) %>%
      mutate(days_since = date - start_date) %>%
      filter(days_since >= 0) %>%
      mutate(days_since = as.numeric(days_since))
    fit <- lm(log(cases) ~ days_since, data = pd) 
    # plot(pd$days_since, log(pd$cases))
    # abline(fit)
    ## Slope
    curve <- fit$coef[2]
    
    # Predict days ahead
    fake <- tibble(days_since = seq(0, max(pd$days_since) + 5, by = 1))
    fake <- left_join(fake, pd %>% dplyr::select(days_since, cases, date))
    fake$predicted <- exp(predict(fit, newdata = fake))
    
    # Doubling time
    dt <- log(2)/fit$coef[2]
    out <- tibble(country = this_country,
                  doubling_time = dt,
                  slope = curve)
    out_list[[counter]] <- out
    curve_list[[counter]] <- fake %>% mutate(country = this_country)
  }
}
done <- bind_rows(out_list)
print(done)
# A tibble: 10 x 3
   country        doubling_time  slope
   <chr>                  <dbl>  <dbl>
 1 Italy                   18.3 0.0378
 2 Spain                   14.6 0.0474
 3 France                  16.7 0.0415
 4 Germany                 17.5 0.0397
 5 Italy                   18.3 0.0378
 6 Switzerland             26.1 0.0265
 7 Denmark                 27.6 0.0251
 8 US                      11.1 0.0624
 9 United Kingdom          12.8 0.0541
10 Norway                  36.9 0.0188
curves <- bind_rows(curve_list)
# Get curves back in exponential form
# curves$curve <- exp(curves$curve)

# Join doubling time to curves
joined <- left_join(curves, done)

# Get rid of Italy future (since it's the "leader")
joined <- joined %>%
  filter(country != 'Italy' |
           date <= (Sys.Date() -1))


# Make long format
long <- joined %>% 
  dplyr::select(date, days_since, country, cases, predicted, doubling_time) %>%
  tidyr::gather(key, value, cases:predicted) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

The below chart shows the trajectories in terms of number of cases in Europe in red, and the predicted trajectories in black. The black line assumes that the doubling rate will stay constant.

cols <- c('red', 'black')
ggplot(data = long,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = long %>% filter(key != 'Confirmed cases'),
            size = 1.2, alpha = 0.8) +
  geom_point(data = long %>% filter(key == 'Confirmed cases')) +
  geom_line(data = long %>% filter(key == 'Confirmed cases'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >150 cumulative cases',
       y = 'Cases',
       title = 'COVID-19 CASES: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >150 cumulative cases)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15))

Since Italy is “leading the way”, it’s helpful to also compare each country to Italy. Let’s see that.

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Italy')
ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = cases) %>%
  dplyr::select(Italy, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, cases, predicted, Italy,doubling_time)
ol <- tidyr::gather(ol, key, value, cases: Italy) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

cols <- c('red', 'blue', 'black')
ggplot(data = ol,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = ol %>% filter(!key %in% c('Confirmed cases', 'Italy')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = ol %>% filter(key %in% c('Italy')),
            size = 0.8, alpha = 0.8) +
  geom_point(data = ol %>% filter(key == 'Confirmed cases')) +
  geom_line(data = ol %>% filter(key == 'Confirmed cases'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >150 cumulative cases',
       y = 'Cases',
       title = 'COVID-19 CASES: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >150 cumulative cases)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15))

In the above, what’s striking is how many places have trajectories that are worse than Italy’s. Yes, Italy has more cases, but it’s doubling time is less. Either that changes soon, or these other countries will soon have more cases than Italy.

Deaths or cases?

The number of cases is not necessarily the best indicator for the severity of an outbreak of this nature. Why? Because (a) testing rates and protocols are different by place and (b) testing rates are different by time (since health services are changing their approaches as things develop). In other words, when we compare the number of cases by place and time, we are introducing significant bias.

Using deaths to gauge the magnitude of the outbreak is also problematic. Death rates are differential by age, so the number of deaths depends on a country’s population period, or age structure. Also, death rates will be a function of health services, which are not of the same quality every where. And, of course, like cases, we don’t necessarily know about all of the deaths that occur because of COVID-19.

Still, there’s an argument that death rates have less bias than case rates because deaths are easier to identify than cases. Let’s accept that argument, for the time being, and have a look at death rates by country.

# Doubling time
n_deaths_start = 5
countries = c('Italy', 'Spain', 'France', 'Italy', 'Switzerland', 'Denmark', 'US', 'United Kingdom', 'Norway', 'Germany')
# countries <- sort(unique(df_country$country))

make_double_time <- function(data = df_country,
                             the_country = 'Spain',
                             n_deaths_start = 5,
                             time_ahead = 7){
   sub_data <-data %>% filter(country == the_country)
  # Only calculate on countries with n_cases_start or greater cases,
  # starting at the first day at n_cases_start or greater
  ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start
  if(ok){
    counter <- counter + 1
    pd <- sub_data %>%
      filter(!is.na(deaths)) %>%
      mutate(start_date = min(date[deaths >= n_deaths_start])) %>%
      mutate(days_since = date - start_date) %>%
      filter(days_since >= 0) %>%
      mutate(days_since = as.numeric(days_since)) %>%
      mutate(the_weight = 1/(1 + (as.numeric(max(date) - date))))
    fit <- lm(log(deaths) ~ days_since,
              weights = the_weight,
              data = pd) 
    # fitlo <- loess(deaths ~ days_since, data = pd)
    # plot(pd$days_since, log(pd$cases))
    # abline(fit)
    ## Slope
    # curve <- fit$coef[2]
    
    # Predict days ahead
    day0 <- pd$date[pd$days_since == 0]
    fake <- tibble(days_since = seq(0, max(pd$days_since) + time_ahead, by = 1))
    fake <- fake %>%mutate(date = seq(day0, day0+max(fake$days_since), by = 1))
    fake <- left_join(fake, pd %>% dplyr::select(days_since, deaths, date))
    fake$predicted <- exp(predict(fit, newdata = fake))
    # fake$predictedlo <- predict(fitlo, newdata = fake)
    ci <- exp(predict(fit, newdata = fake, interval = 'prediction'))
    # cilo <- predict(fitlo, newdata = fake, interval = 'prediction')

    fake$lwr <- ci[,'lwr']
    fake$upr <- ci[,'upr']
    # fake$lwrlo <- ci[,'lwr']
    # fake$uprlo <- ci[,'upr']
    # Doubling time
    dt <- log(2)/fit$coef[2]
    fake %>% mutate(country = the_country) %>% mutate(doubling_time = dt)
  }
}

plot_double_time <- function(data, ylog = F){
  the_labs <- labs(x = 'Date',
                   y = 'Deaths',
                   title = paste0('Predicted deaths in ', data$country[1]))
  long <- data %>%
    tidyr::gather(key, value, deaths:predicted) %>%
    mutate(key = Hmisc::capitalize(key))
  g <- ggplot() +
        geom_ribbon(data = data %>% filter(date > max(long$date[!is.na(long$value) & long$key == 'Deaths'])),
                aes(x = date,
                    ymax = upr,
                    ymin = lwr),
                alpha =0.6,
                fill = 'darkorange') +
    geom_line(data = long,
              aes(x = date,
                  y = value,
                  group = key,
                  lty = key)) +
    geom_point(data = long %>% filter(key == 'Deaths'),
               aes(x = date,
                   y = value)) +
    theme_simple() +
    theme(legend.position = 'right',
          legend.title = element_blank()) +
    the_labs
  if(ylog){
    g <- g + scale_y_log10()
  }
  return(g)
}
options(scipen = '999')
data <- make_double_time(n_deaths_start = 150, time_ahead = 7)
data
NULL
dir.create('/tmp/ccaa_predictions')

plot_double_time(data, ylog = T) +
  labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)),\nassuming no change in growth trajectory since first day at >150 deaths')
Error in UseMethod("gather_"): no applicable method for 'gather_' applied to an object of class "NULL"
ggsave('/tmp/ccaa_predictions/spain.png')
# All ccaas
ccaas <- sort(unique(esp_df$ccaa))
for(i in 1:length(ccaas)){
  message(i)
  this_ccaa <- ccaas[i]
  sub_data <- esp_df %>% mutate(country = ccaa) 
  try({
    data <- make_double_time(
    data = sub_data,
    the_country = this_ccaa,
    n_deaths_start = 5,
    time_ahead = 7)
  plot_double_time(data, ylog = T) +
  labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)), assuming no change in growth trajectory since first day at >5 deaths')
  ggsave(paste0('/tmp/ccaa_predictions/',
                this_ccaa, '.png'),
         height = 4.9,
         width = 8.5)
  })

}
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
Error in UseMethod("gather_") : 
  no applicable method for 'gather_' applied to an object of class "NULL"
# all_countries <- sort(unique(df_country$country))
# for(i in 1:length(all_countries)){
#   this_country <- all_countries[i]
#   data <- make_double_time(the_country = this_country, n_deaths_start = 5)
#   if(!is.null(data)){
#     # print(this_country)
#     g <- plot_double_time(data, ylog = F) +
#   labs(subtitle = 'Basic log-linear model assuming no change in growth trajectory since first day at >5 deaths')
#     ggsave(paste0('/tmp/', this_country, '.png'), height = 5, width = 8)
#     print(data)
#   }
# }
counter <- 0
# Africa
data <- make_double_time(the_country = 'South Africa',
                         n_deaths_start = 5, time_ahead = 7)
data
# A tibble: 93 x 8
   days_since date       country      deaths predicted   lwr   upr doubling_time
        <dbl> <date>     <chr>         <dbl>     <dbl> <dbl> <dbl>         <dbl>
 1          0 2020-03-31 South Africa      5      15.3  13.2  17.7          11.6
 2          1 2020-04-01 South Africa      5      16.2  14.0  18.8          11.6
 3          2 2020-04-02 South Africa      5      17.2  14.9  19.9          11.6
 4          3 2020-04-03 South Africa      9      18.3  15.8  21.1          11.6
 5          4 2020-04-04 South Africa      9      19.4  16.8  22.4          11.6
 6          5 2020-04-05 South Africa     11      20.6  17.9  23.7          11.6
 7          6 2020-04-06 South Africa     12      21.9  19.0  25.1          11.6
 8          7 2020-04-07 South Africa     13      23.2  20.2  26.6          11.6
 9          8 2020-04-08 South Africa     18      24.6  21.5  28.2          11.6
10          9 2020-04-09 South Africa     18      26.2  22.8  29.9          11.6
# … with 83 more rows
dir.create('/tmp/africa_predictions')

plot_double_time(data, ylog = T) +
  labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)),\nassuming no change in growth trajectory since first day at >150 deaths')

out_list <- curve_list <-  list()
counter <- 0
for(i in 1:length(countries)){
  message(i)
  this_country <- countries[i]
  sub_data <-df_country %>% filter(country == this_country)
  # Only calculate on countries with n_cases_start or greater cases,
  # starting at the first day at n_cases_start or greater
  ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start
  if(ok){
    counter <- counter + 1
    pd <- sub_data %>%
      filter(!is.na(deaths)) %>%
      mutate(start_date = min(date[deaths >= n_deaths_start])) %>%
      mutate(days_since = date - start_date) %>%
      filter(days_since >= 0) %>%
      mutate(days_since = as.numeric(days_since))
    fit <- lm(log(deaths) ~ days_since, data = pd) 
    # plot(pd$days_since, log(pd$cases))
    # abline(fit)
    ## Slope
    # curve <- fit$coef[2]
    
    # Predict days ahead
    fake <- tibble(days_since = seq(0, max(pd$days_since) + 5, by = 1))
    fake <- left_join(fake, pd %>% dplyr::select(days_since, deaths, date))
    fake$predicted <- exp(predict(fit, newdata = fake))
    
    # Doubling time
    dt <- log(2)/fit$coef[2]
    out <- tibble(country = this_country,
                  doubling_time = dt)
    out_list[[counter]] <- out
    curve_list[[counter]] <- fake %>% mutate(country = this_country)
  }
}
done <- bind_rows(out_list)
curves <- bind_rows(curve_list)
# Get curves back in exponential form
# curves$curve <- exp(curves$curve)

# Join doubling time to curves
joined <- left_join(curves, done)

# Get rid of Italy future (since it's the "leader")
joined <- joined %>%
  filter(country != 'Italy' |
           date <= (Sys.Date() -1))


# Make long format
long <- joined %>% 
  dplyr::select(date, days_since, country, deaths, predicted, doubling_time) %>%
  tidyr::gather(key, value, deaths:predicted) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))
cols <- c('red', 'black')
sub_data <-  long %>% filter(country != 'US')
ggplot(data = sub_data,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = sub_data %>% filter(key != 'Deaths'),
            size = 1.2, alpha = 0.8) +
  geom_point(data = sub_data %>% filter(key == 'Deaths')) +
  geom_line(data = sub_data %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 cumulative deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15))

Let’s again overlay Italy.

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Italy')
ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = deaths) %>%
  dplyr::select(Italy, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, deaths, predicted, Italy,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Italy) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

cols <- c('red', 'blue', 'black')
sub_data <- ol %>% 
  filter(!(key == 'Predicted (based on current doubling time)' &
             country == 'Spain' &
             days_since > 13))
ggplot(data = sub_data,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = sub_data %>% filter(!key %in% c('Deaths', 'Italy')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = sub_data %>% filter(key %in% c('Italy')),
            size = 0.8, alpha = 0.8) +
  geom_point(data = sub_data %>% filter(key == 'Deaths')) +
  geom_line(data = sub_data %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  scale_y_log10() +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15)) 

Let’s look just at Spain

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Italy',
                         country == 'Spain')
ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = deaths) %>%
  dplyr::select(Italy, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, deaths, predicted, Italy,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Italy) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', 
                      ifelse(key == 'Deaths', 'Spain', key)))

cols <- c('blue',  'black', 'red')
ggplot(data = ol,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = ol %>% filter(key %in% c('Italy')),
            size = 0.8, alpha = 0.8) +
  # geom_point(data = ol %>% filter(key == 'Deaths')) +
    geom_point(data = ol %>% filter(country == 'Spain',
                                    key == 'Spain'), size = 4, alpha = 0.6) +

  geom_line(data = ol %>% filter(key == 'Deaths'),
            size = 0.8) +
  # facet_wrap(~paste0(country, '\n',
  #                    '(doubling time: ', 
  #                    round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,1)) +
  scale_color_manual(name = '',
                     values = cols) +
  scale_y_log10() +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = 13),
          plot.title = element_text(size = 15),
          axis.title = element_text(size = 18))

The importance of lag

Things are changing very rapidly. And measures being taken by these countries will have an impact on the outbreak.

But it’s important to remember that there is a lag between when an intervention takes place and when its effect is notable. Because of the incubation period - the number of days between someone getting infected and becoming sick - what we do today won’t really have an effect until next weekend. And the clinical cases that present today are among people who got infected a week ago.

Disease control measures work. We can see that clearly in the case of Hubei, Wuhan, Iran, Japan. And they will work in Europe too. But because many of these measures were implemented very recently, we won’t likely see a major effect for at least a few more days.

In the mean time, it’s important to practice social distancing. Stay away from others to keep both you and others safe. Listen to Health Authorities. Take this very seriously.

Spain and Italy regions

# Madrid vs Lombardy deaths
n_death_start <- 5
pd <- esp_df %>%
  # filter(ccaa == 'Madrid') %>%
  dplyr::select(date, ccaa, cases, deaths) %>%
  bind_rows(ita %>%
              # filter(ccaa == 'Lombardia') %>%
              dplyr::select(date, ccaa, cases, deaths)) %>%
  arrange(date) %>%
  group_by(ccaa) %>%
  mutate(first_n_death = min(date[deaths >= n_death_start])) %>%
  ungroup %>%
  mutate(days_since_n_deaths = date - first_n_death) %>%
  filter(is.finite(days_since_n_deaths))

pd$country <- pd$ccaa
pd$cases <- pd$cases
countries <- sort(unique(pd$country))
out_list <- curve_list <-  list()
counter <- 0
for(i in 1:length(countries)){
  message(i)
  this_country <- countries[i]
  sub_data <- pd %>% filter(country == this_country)
  # Only calculate on countries with n_cases_start or greater cases,
  # starting at the first day at n_cases_start or greater
  # ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start
  ok <- length(which(sub_data$deaths >= n_deaths_start))
  if(ok){
    counter <- counter + 1
    sub_pd <- sub_data %>%
      filter(!is.na(deaths)) %>%
      mutate(start_date = min(date[deaths >= n_deaths_start])) %>%
      mutate(days_since = date - start_date) %>%
      filter(days_since >= 0) %>%
      mutate(days_since = as.numeric(days_since))
    fit <- lm(log(deaths) ~ days_since, data = sub_pd) 
    # plot(pd$days_since, log(pd$cases))
    # abline(fit)
    ## Slope
    # curve <- fit$coef[2]
    
    # Predict days ahead
    fake <- tibble(days_since = seq(0, max(sub_pd$days_since) + 5, by = 1))
    fake <- left_join(fake, sub_pd %>% dplyr::select(days_since, deaths, date))
    fake$predicted <- exp(predict(fit, newdata = fake))
    
    # Doubling time
    dt <- log(2)/fit$coef[2]
    out <- tibble(country = this_country,
                  doubling_time = dt)
    out_list[[counter]] <- out
    curve_list[[counter]] <- fake %>% mutate(country = this_country)
  }
}
done <- bind_rows(out_list)
curves <- bind_rows(curve_list)
# Get curves back in exponential form
# curves$curve <- exp(curves$curve)

# Join doubling time to curves
joined <- left_join(curves, done)


# Make long format
long <- joined %>% 
  dplyr::select(date, days_since, country, deaths, predicted, doubling_time) %>%
  tidyr::gather(key, value, deaths:predicted) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

# Remove those with not enough data to have a doubling time yet
long <- long %>% filter(!is.na(doubling_time))
text_size <- 12

cols <- c('red', 'black')
ggplot(data = long,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  geom_line(data = long %>% filter(key != 'Deaths'),
            size = 1.2, alpha = 0.8) +
  geom_point(data = long %>% filter(key == 'Deaths')) +
  geom_line(data = long %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_y_log10() +
  scale_linetype_manual(name ='',
                        values = c(1,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >150 cumulative cases',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = text_size * 0.5),
          plot.title = element_text(size = 15))

Let’s overlay Lombardy

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Lombardia')
ol2 <- joined %>% filter(country == 'Lombardia') %>% dplyr::rename(Lombardia = deaths) %>%
  dplyr::select(Lombardia, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, deaths, predicted, Lombardia,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Lombardia) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

# Remove those with not enough data to have a doubling time yet
ol <- ol %>% filter(!is.na(doubling_time))

cols <- c('red', 'blue', 'black')
ggplot(data = ol,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  scale_y_log10() +
  geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = ol %>% filter(key %in% c('Lombardia')),
            size = 0.5, alpha = 0.8) +
  geom_point(data = ol %>% filter(key == 'Deaths')) +
  geom_line(data = ol %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = text_size * 0.5),
          plot.title = element_text(size = 15))

Show only Spanish regions vs. Lombardy

text_size <- 14

# Overlay Italy
ol1 <- joined %>% filter(!country %in% 'Lombardia')
ol2 <- joined %>% filter(country == 'Lombardia') %>% dplyr::rename(Lombardia = deaths) %>%
  dplyr::select(Lombardia, days_since)
ol <- left_join(ol1, ol2) %>%
  dplyr::select(days_since, date, country, deaths, predicted, Lombardia,doubling_time)
ol <- tidyr::gather(ol, key, value, deaths: Lombardia) %>%
  mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>%
  mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))

# Remove those with not enough data to have a doubling time yet
ol <- ol %>% filter(!is.na(doubling_time))

# Only Spain
ol <- ol %>% filter(country %in% esp_df$ccaa) %>%
  filter(!country %in% 'Aragón')

cols <- c('red', 'blue', 'black')
ggplot(data = ol,
       aes(x = days_since,
           y = value,
           lty = key,
           color = key)) +
  scale_y_log10() +
  geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Lombardia')),
            size = 1.2, alpha = 0.8) +
    geom_line(data = ol %>% filter(key %in% c('Lombardia')),
            size = 0.5, alpha = 0.8) +
  geom_point(data = ol %>% filter(key == 'Deaths')) +
  geom_line(data = ol %>% filter(key == 'Deaths'),
            size = 0.8) +
  facet_wrap(~paste0(country, '\n',
                     '(doubling time: ', 
                     round(doubling_time, digits = 1), ' days)'), scales = 'free') +
  theme_simple() +
  scale_linetype_manual(name ='',
                        values = c(1,6,2)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  labs(x = 'Days since first day at >5 deaths',
       y = 'Deaths',
       title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    theme(strip.text = element_text(size = text_size * 0.6),
          plot.title = element_text(size = 15))

Same plot but overlayed

Same as above, but overlaid

text_size <-10

# cols <- c('red', 'black')
long <- long %>% filter(country %in% c('Lombardia',
                                       'Emilia Romagna') |
                          country %in% esp_df$ccaa) %>%
  filter(country != 'Aragón')
places <- sort(unique(long$country))

cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 7, 'Spectral'))(length(places))
cols[which(places == 'Madrid')] <- 'red'
cols[which(places == 'Cataluña')] <- 'purple'
cols[which(places == 'Lombardia')] <- 'darkorange'
cols[which(places == 'Emilia Romagna')] <- 'darkgreen'

long$key <- ifelse(long$key != 'Deaths', 'Predicted', long$key)
long$key <- ifelse(long$key == 'Predicted', 'Muertes\nprevistas',
                   'Muertes\nobservadas')


# Keep only Madrid, Lombardy, Emilia Romagna
long <- long %>%
  filter(country %in% c('Madrid',
                        'Lombardia',
                        'Emilia Romagna'))

ggplot(data = long,
       aes(x = days_since,
           y = value,
           lty = key,
           color = country)) +
  geom_point(data = long %>% filter(key == 'Muertes\nobservadas'), size = 2, alpha = 0.8) +
  geom_line(data = long %>% filter(key == 'Muertes\nprevistas'), size = 1, alpha = 0.7) +
  geom_line(data = long %>% filter(key != 'Muertes\nprevistas'), size = 0.8) +
  theme_simple() +
  scale_y_log10() +
  scale_linetype_manual(name ='',
                        values = c(1,4)) +
  scale_color_manual(name = '',
                     values = cols) +
  theme(legend.position = 'top') +
  # labs(x = 'Days since first day at 5 or more cumulative deaths',
  #      y = 'Deaths',
  #      title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
  #      caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
  #      subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
    labs(x = 'Días desde el primer día a 5 o más muertes acumuladas',
       y = 'Muertes (escala logarítmica)',
       title = 'Muertes por COVID-19',
       caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
       subtitle = '(Tasa de crecimiento calculada desde el primer día a 5 o más muertes acumuladas)\n(Muertes "previstas": suponiendo que no hay cambios en la tasa de crecimiento)') +
    theme(strip.text = element_text(size = text_size * 0.75),
          plot.title = element_text(size = text_size * 3),
          legend.text = element_text(size = text_size * 1.5),
          axis.title = element_text(size = text_size * 2),
          axis.text = element_text(size = text_size * 2))

# cols <- c(cols, 'darkorange')
# ggplot(data = ol,
#        aes(x = days_since,
#            y = value,
#            lty = key,
#            color = key)) +
#   scale_y_log10() +
#   geom_line(aes(color = country)) +
#   
#   # geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')),
#   #           size = 1.2, alpha = 0.8) +
#   #   geom_line(data = ol %>% filter(key %in% c('Lombardia')),
#   #           size = 0.5, alpha = 0.8) +
#   # geom_point(data = ol %>% filter(key == 'Deaths')) +
#   # geom_line(data = ol %>% filter(key == 'Deaths'),
#   #           size = 0.8) +
#   theme_simple() +
#   scale_linetype_manual(name ='',
#                         values = c(1,6,2)) +
#   scale_color_manual(name = '',
#                      values = cols) +
#   theme(legend.position = 'top') +
#   labs(x = 'Days since first day at >5 deaths',
#        y = 'Deaths',
#        title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)',
#        caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19',
#        subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') +
#     theme(strip.text = element_text(size = text_size * 1),
#           plot.title = element_text(size = 15))
# Map data preparation

if(!'map.RData' %in% dir()){
  esp1 <- getData(name = 'GADM', country = 'ESP', level = 1)
# Remove canary
esp1 <- esp1[esp1@data$NAME_1 != 'Islas Canarias',]
espf <- fortify(esp1, region = 'NAME_1')

# Standardize names
# Convert names
map_names <- esp1@data$NAME_1
data_names <- sort(unique(esp_df$ccaa))
names_df <- tibble(NAME_1 = c('Andalucía',
 'Aragón',
 'Cantabria',
 'Castilla-La Mancha',
 'Castilla y León',
 'Cataluña',
 'Ceuta y Melilla',
 'Comunidad de Madrid',
 'Comunidad Foral de Navarra',
 'Comunidad Valenciana',
 'Extremadura',
 'Galicia',
 'Islas Baleares',
 'La Rioja',
 'País Vasco',
 'Principado de Asturias',
 'Región de Murcia'),
 ccaa = c('Andalucía',
 'Aragón',
 'Cantabria',
 'CLM',
 'CyL',
 'Cataluña',
 'Melilla',
 'Madrid',
 'Navarra',
 'C. Valenciana',
 'Extremadura',
 'Galicia',
 'Baleares',
 'La Rioja',
 'País Vasco',
 'Asturias',
 'Murcia'))


espf <- left_join(espf %>% dplyr::rename(NAME_1 = id), names_df)
centroids <- data.frame(coordinates(esp1))
names(centroids) <- c('long', 'lat')
centroids$NAME_1 <- esp1$NAME_1
centroids <- centroids %>% left_join(names_df)

# Get random sampling points

  random_list <- list()
  for(i in 1:nrow(esp1)){
    message(i)
    shp <- esp1[i,]
    # bb <- bbox(shp)
    this_ccaa <- esp1@data$NAME_1[i]
    # xs <- runif(n = 500, min = bb[1,1], max = bb[1,2])
    # ys <- runif(n = 500, min = bb[2,1], max = bb[2,2])
    # random_points <- expand.grid(long = xs, lat = ys) %>%
    #   mutate(x = long,
    #          y = lat)
    # coordinates(random_points) <- ~x+y
    # proj4string(random_points) <- proj4string(shp)
    # get ccaa
    message('getting locations of randomly generated points')
    # polys <- over(random_points,polygons(shp))
    # polys <- as.numeric(polys)
    random_points <- spsample(shp, n = 20000, type = 'random')
    random_points <- data.frame(random_points)
    random_points$NAME_1 <-  this_ccaa
    random_points <- left_join(random_points, names_df) %>% dplyr::select(-NAME_1)
    random_list[[i]] <- random_points
  }
  random_points <- bind_rows(random_list)
  random_points <- random_points %>% mutate(long = x,
                                            lat = y)

save(espf,
     esp1,
     names_df,
     centroids,
     random_points,
     file = 'map.RData')
} else {
  load('map.RData')
}

# Define a function for adding zerio
add_zero <- 
  function (x, n) 
  {
    x <- as.character(x)
    adders <- n - nchar(x)
    adders <- ifelse(adders < 0, 0, adders)
    for (i in 1:length(x)) {
      if (!is.na(x[i])) {
        x[i] <- paste0(paste0(rep("0", adders[i]), collapse = ""), 
                       x[i], collapse = "")
      }
    }
    return(x)
  }
remake_world_map <- FALSE
options(scipen = '999')
if(remake_world_map){
  # World map animation
  world <- map_data('world')
  # world <- ne_countries(scale = "medium", returnclass = "sf")
  
  # Get plotting data
  pd <- df_country %>%
    dplyr::select(date, lng, lat, n = cases)
  dates <- sort(unique(pd$date))
  n_days <- length(dates)
  # # Define vectors for projection
  # vec_lon <- seq(30, -20, length = n_days)
  # vec_lat <- seq(25, 15, length = n_days)
  
  dir.create('animation')
  for(i in 1:n_days){
    message(i, ' of ', n_days)
    this_date <- dates[i]
    # this_lon <- vec_lon[i]
    # this_lat <- vec_lat[i]
    # the_crs <-
    #   paste0("+proj=laea +lat_0=", this_lat,
    #          " +lon_0=",
    #          this_lon,
    #          " +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs ")
    sub_data <- pd %>%
      filter(date == this_date)
    # coordinates(sub_data) <- ~lng+lat
    # proj4string(sub_data) <- proj4string(esp1)
    # # sub_data <- spTransform(sub_data,
    # #                         the_crs)
    # coordy <- coordinates(sub_data)
    # sub_data@data$long <- coordy[,1]
    # sub_data@data$lat <- coordy[,2]
  
    g <- ggplot() +
      geom_polygon(data = world,
                   aes(x = long,
                       y = lat,
                       group = group),
                   fill = 'black',
                   color = 'white',
                   size = 0.1) +
      theme_map() +
          geom_point(data = sub_data %>% filter(n > 0) %>% mutate(Deaths = n),
                 aes(x = lng,
                     y = lat,
                     size = Deaths),
                 color = 'red',
                 alpha = 0.6) +
      geom_point(data = tibble(x = c(0,0), y = c(0,0), Deaths = c(1, 100000)),
                 aes(x = x,
                     y = y,
                     size = Deaths),
                 color = 'red',
                 alpha =0.001) +
      scale_size_area(name = '', breaks = c(100, 1000, 10000, 100000),
                      max_size = 25
                      ) +
    # scale_size_area(name = '', limits = c(1, 10), breaks = c(0, 10, 30, 50, 70, 100, 200, 500)) +
      labs(title = this_date) +
      theme(plot.title = element_text(size = 30),
            legend.text = element_text(size = 15),
            legend.position = 'left')
  
    plot_number <- add_zero(i, 3)
    ggsave(filename = paste0('animation/', plot_number, '.png'),
           plot = g,
           width = 9.5,
           height = 5.1)
  }
  setwd('animation')
  system('convert -delay 30x100 -loop 0 *.png result.gif')
  setwd('..')

}

Maps of Spain

make_map <- function(var = 'deaths',
                     data = NULL,
                     pop = FALSE,
                     pop_factor = 100000,
                     points = FALSE,
                     line_color = 'white',
                     add_names = T,
                     add_values = T,
                     text_size = 2.7){
  
  if(is.null(data)){
    data <- esp_df %>%  mutate(ccaa = cat_transform(ccaa))

  }

  left <- espf %>%   mutate(ccaa = cat_transform(ccaa)) 
  right <- data[,c('ccaa', paste0(var, '_non_cum'))]
  

  names(right)[ncol(right)] <- 'var'
  right <- right %>% group_by(ccaa) %>% summarise(var = sum(var, na.rm = T))
  
  if(pop){
    right <- left_join(right, esp_pop)
    right$var <- right$var / right$pop * pop_factor
  }
  map <- left_join(left, right)
  
  if(points){
    the_points <- centroids %>%
      left_join(right)
    g <- ggplot() +
      geom_polygon(data = map,
         aes(x = long,
             y = lat,
             group = group),
         fill = 'black',
         color = line_color,
         lwd = 0.4, alpha = 0.8) +
      geom_point(data = the_points,
                 aes(x = long,
                     y = lat,
                     size = var),
                 color = 'red',
                 alpha = 0.7) +
      scale_size_area(name = '', max_size = 20)
  } else {
    # cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c')
    cols <- RColorBrewer::brewer.pal(n = 8, name = 'Blues')
    g <- ggplot(data = map,
         aes(x = long,
             y = lat,
             group = group)) +
    geom_polygon(aes(fill = var),
                 lwd = 0.3,
                 color = line_color) +
      scale_fill_gradientn(name = '',
                           colours = cols)
    # scale_fill_viridis(name = '' ,option = 'magma',
    #                    direction = -1) 
  }
  
  # Add names?
  if(add_names){
    centy <- centroids %>% left_join(right)
    if(add_values){
      centy$label <- paste0(centy$ccaa, '\n(', round(centy$var, digits = 2), ')')
    } else {
      centy$label <- centy$ccaa
    }

    g <- g +
      geom_text(data = centy,
                aes(x = long,
                    y = lat,
                    label = label,
                    group = ccaa),
                alpha = 0.7,
                size = text_size)
  }
  
  g +
    theme_map() +
    labs(subtitle = paste0('Data as of ', max(data$date))) +
    theme(legend.position = 'right')
  
}

make_dot_map <- function(var = 'deaths',
                     date = NULL,
                     pop = FALSE,
                     pop_factor = 100,
                     point_factor = 1,
                     points = FALSE,
                     point_color = 'darkred',
                     point_size = 0.6,
                     point_alpha = 0.5){
  
  
  if(is.null(date)){
    the_date <- max(esp_df$date)
  } else {
    the_date <- date
  }
    right <- esp_df[esp_df$date == the_date,c('ccaa', var)]
   names(right)[ncol(right)] <- 'var'
  if(pop){
    right <- left_join(right, esp_pop)
    right$var <- right$var / right$pop * pop_factor
  }
  map_data <- esp1@data %>%
    left_join(names_df) %>%
      left_join(right)
  map_data$var <- map_data$var / point_factor
  out_list <- list()
  for(i in 1:nrow(map_data)){
    sub_data <- map_data[i,]
    this_value = round(sub_data$var)

    if(this_value >= 1){
      this_ccaa = sub_data$ccaa
      # get some points
      sub_points <- random_points %>% filter(ccaa == this_ccaa)
      sampled_points <- sub_points %>% dplyr::sample_n(this_value)
      out_list[[i]] <- sampled_points
    }
  }
  the_points <- bind_rows(out_list)
  
  g <- ggplot() +
    geom_polygon(data = espf,
         aes(x = long,
             y = lat,
             group = group),
         fill = 'white',
         color = 'black',
         lwd = 0.4, alpha = 0.8) +
    geom_point(data = the_points,
               aes(x = long,
                   y = lat),
               color = point_color,
               size = point_size,
               alpha = point_alpha)
  g +
    theme_map() +
    labs(subtitle = paste0('Data as of ', max(esp_df$date)))
  
}

Deaths

Absolute number of deaths: points

make_map(var = 'deaths',
       points = T) +
  labs(title = 'Number of deaths',
       caption = '@joethebrew')
Error in grid.Call.graphics(C_setviewport, vp, TRUE): non-finite location and/or size for viewport

Absolute number of deaths: choropleth

make_map(var = 'deaths',
         line_color = 'darkgrey',
       points = F) +
  labs(title = 'Number of deaths',
       caption = '@joethebrew')

Number of deaths adjusted by population: points

make_map(var = 'deaths', pop = TRUE, points = T) +
  labs(title = 'Number of deaths per 100,000',
       caption = '@joethebrew')
Error in grid.Call.graphics(C_setviewport, vp, TRUE): non-finite location and/or size for viewport

Number of deaths adjusted by population: polygons

make_map(var = 'deaths', pop = TRUE, points = F, line_color = 'darkgrey') +
  labs(title = 'Number of deaths per 100,000',
       caption = '@joethebrew')

Number of deaths: 1 dot per death

make_dot_map(var = 'deaths', point_size = 0.05) +
  labs(title = 'COVID-19 deaths: 1 point = 1 death\nImportant: points are random within each CCAA; do not reflect exact location',
       caption = '@joethebrew')
Error in FUN(X[[i]], ...): object 'lat' not found

Cases

Absolute number of cases: points

make_map(var = 'cases',
       points = T) +
  labs(title = 'Number of confirmed cases',
       caption = '@joethebrew')

Absolute number of cases: choropleth

make_map(var = 'cases',
         line_color = 'darkgrey',
       points = F) +
  labs(title = 'Number of confirmed cases',
       caption = '@joethebrew')

Number of cases adjusted by population: points

make_map(var = 'cases', pop = TRUE, points = T) +
  labs(title = 'Number of confirmed cases per 100,000',
       caption = '@joethebrew')

Number of cases adjusted by population: polygons

make_map(var = 'cases', pop = TRUE, points = F,
         line_color = 'darkgrey') +
  labs(title = 'Number of confirmed cases per 100,000',
       caption = '@joethebrew')

Number of cases: points

make_dot_map(var = 'cases',
             point_size = 0.05, point_alpha = 0.5, point_factor = 10) +
  labs(title = 'COVID-19 cases: 1 point = 10 cases\nImportant: points are random within each CCAA; do not reflect exact location',
       caption = '@joethebrew')